diff -urN linux-2.4.27/CREDITS linux-2.4.28/CREDITS --- linux-2.4.27/CREDITS 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/CREDITS 2004-11-17 03:54:20.892368866 -0800 @@ -458,6 +458,14 @@ D: REINER SCT cyberJack pinpad/e-com USB chipcard reader driver S: Germany +N: Adrian Bunk +E: bunk@stusta.de +P: 1024D/4F12B400 B29C E71E FE19 6755 5C8A 84D4 99FC EA98 4F12 B400 +D: misc kernel hacking and testing +S: Grasmeierstrasse 11 +S: 80805 Muenchen +S: Germany + N: Ray Burr E: ryb@nightmare.com D: Original author of Amiga FFS filesystem @@ -1485,18 +1493,16 @@ S: USA N: Dave Jones -E: davej@suse.de -W: http://www.suse.de/~davej +E: davej@redhat.com +W: http://www.codemonkey.org.uk D: Moved PCI bridge tuning to userspace (Powertweak). D: Various x86 (& clones) setup code hacking. D: AFFS fixes for 2.3.x D: Various Janitorial hacks. (kernel-janitor.sourceforge.net) -S: c/o SuSE Linux UK Ltd -S: The Kinetic Centre -S: Theobald Street -S: Borehamwood -S: Herts, WD6 4PJ -S: United Kingdom +S: Red Hat +S: 10, Technology Park +S: Westford, MA 01863 +S: USA N: Ani Joshi E: ajoshi@kernel.crashing.org @@ -1809,10 +1815,10 @@ S: Germany N: Achim Leubner -E: achim@vortex.de -D: GDT SCSI Disk Array Controller driver -S: ICP vortex Computersysteme GmbH -S: Flein +E: achim_leubner@adaptec.com +D: GDT Disk Array Controller/Storage RAID controller driver +S: ICP vortex GmbH +S: Neckarsulm S: Germany N: Phil Lewis @@ -2130,14 +2136,13 @@ S: Netherlands N: David S. Miller -E: davem@redhat.com +E: davem@davemloft.net D: Sparc and blue box hacker D: Vger Linux mailing list co-maintainer D: Linux Emacs elf/qmagic support + other libc/gcc things D: Yee bore de yee bore! ;-) -S: 750 N. Shoreline Blvd. -S: Apt. #111 -S: Mountain View, California 94043 +S: 575 Harrison St. #103 +S: San Francisco, California 94105 S: USA N: Rick Miller diff -urN linux-2.4.27/Documentation/Configure.help linux-2.4.28/Documentation/Configure.help --- linux-2.4.27/Documentation/Configure.help 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/Documentation/Configure.help 2004-11-17 03:54:20.924370182 -0800 @@ -696,6 +696,24 @@ could say N here, and select the "Old hard disk driver" below instead to save about 13 KB of memory in the kernel. +Support for SATA (deprecated; conflicts with libata SATA driver) +CONFIG_BLK_DEV_IDE_SATA + There are two drivers for Serial ATA controllers. + + The main driver, "libata", exists inside the SCSI subsystem + and supports most modern SATA controllers. + + The IDE driver (which you are currently configuring) supports + a few first-generation SATA controllers. + + In order to eliminate conflicts between the two subsystems, + this config option enables the IDE driver's SATA support. + Normally this is disabled, as it is preferred that libata + supports SATA controllers, and this (IDE) driver supports + PATA controllers. + + If unsure, say N. + Old hard disk (MFM/RLL/IDE) driver CONFIG_BLK_DEV_HD_ONLY There are two drivers for MFM/RLL/IDE hard disks. Most people use @@ -763,6 +781,13 @@ . The module will be called ide-cs.o +Cardbus IDE support (Delkin/ASKA/Workbit) +CONFIG_BLK_DEV_DELKIN + Support for Delkin/ASKA/Workbit cardbus CompactFlash Adapters. + This may also work for similar SD and XD adapters. If you want + to be able to use one of these, then say M here. The module will + be called delkin_cb.o + Include IDE/ATAPI CD-ROM support CONFIG_BLK_DEV_IDECD If you have a CD-ROM drive using the ATAPI protocol, say Y. ATAPI is @@ -4678,12 +4703,6 @@ ld.so (check the file for location and latest version). - If you want to compile this as a module ( = code which can be - inserted in and removed from the running kernel whenever you want), - say M here and read . The module - will be called binfmt_elf.o. Saying M or N here is dangerous because - some crucial programs on your system might be in ELF format. - Kernel support for a.out binaries CONFIG_BINFMT_AOUT A.out (Assembler.OUTput) is a set of formats for libraries and @@ -7953,7 +7972,7 @@ QDIO base support for IBM S/390 and zSeries CONFIG_QDIO This driver provides the Queued Direct I/O base support for the - IBM S/390 (G5 and G6) and eServer zSeries (z800 and z900). + IBM S/390 (G5 and G6) and eServer zSeries (z800, z900 and z990). For details please refer to the documentation provided by IBM at @@ -7971,6 +7990,37 @@ If unsure, say N. +IBM S/390 and zSeries OSA-Express and HiperSockets device driver +CONFIG_QETH + This driver supports the IBM S/390 and zSeries OSA Express adapters + in QDIO mode (all media types), HiperSockets interfaces and VM GuestLAN + interfaces in QDIO and HIPER mode. + + For details please refer to the documentation provided by IBM at + + + This driver is also available as a module (code which can be + inserted in and removed from the running kernel whenever you + want). If you want to compile it as a module, say 'M' here and + read file Documentation/modules.txt. + +IPv6 support for qeth +CONFIG_QETH_IPV6 + If CONFIG_QETH is switched on, this option will include IPv6 + support in the qeth device driver. + +IEEE 802.1q VLAN support for qeth +CONFIG_QETH_VLAN + If CONFIG_QETH is switched on, this option will include IEEE + 802.1q VLAN support in the qeth device driver. + +Performance statistics for the qeth drivers +CONFIG_QETH_PERF_STATS + When switched on, this option will add a file in the proc-fs + (/proc/qeth_perf_stats) containing performance statistics. It + may slightly impact performance, so this is only recommended for + internal tuning of the device driver. + SGI WD93C93 SCSI Driver CONFIG_SCSI_SGIWD93 Say Y here to support the on-board WD93C93 SCSI controller found (a) @@ -9281,6 +9331,16 @@ If unsure, say N. +CONFIG_SCSI_ATA_PIIX + This option enables support for ICH5 Serial ATA. + + If unsure, say N. + +CONFIG_SCSI_SATA_NV + This option enables support for NVIDIA Serial ATA. + + If unsure, say N. + CONFIG_SCSI_SATA_PROMISE This option enables support for Promise Serial ATA TX2/TX4. @@ -9301,6 +9361,11 @@ If unsure, say N. +CONFIG_SCSI_SATA_ULI + This option enables support for ULi Electronics SATA. + + If unsure, say N. + CONFIG_SCSI_SATA_VIA This option enables support for VIA Serial ATA. @@ -10077,6 +10142,49 @@ compile it as a module, say M here and read . +Intersil 802.11(a/b/g) Prism GT/Duette/Indigo support +CONFIG_PRISM54 + Enable PCI and Cardbus support for the following chipset based cards: + + ISL3880 - Prism GT 802.11 b/g + ISL3877 - Prism Indigo 802.11 a + ISL3890 - Prism Duette 802.11 a/b/g + + For a complete list of supported cards visit . + Here is the latest confirmed list of supported cards: + + 3com OfficeConnect 11g Cardbus Card aka 3CRWE154G72 + Allnet ALL0271 PCI Card + Compex WL54G Cardbus Card + Corega CG-WLCB54GT Cardbus Card + D-Link Air Plus Xtreme G A1 Cardbus Card aka DWL-g650 + I-O Data WN-G54/CB Cardbus Card + Kobishi XG-300 aka Z-Com Cardbus Card + Netgear WG511 Cardbus Card + Ovislink WL-5400PCI PCI Card + Peabird WLG-PCI PCI Card + Sitecom WL-100i Cardbus Card + Sitecom WL-110i PCI Card + SMC2802W - EZ Connect g 2.4GHz 54 Mbps Wireless PCI Card + SMC2835W - EZ Connect g 2.4GHz 54 Mbps Wireless Cardbus Card + Z-Com XG-900 PCI Card + Zyxel G-100 Cardbus Card + + If you enable this, you require a firmware file as well. + You will need to copy this to /usr/lib/hotplug/firmware/isl3890. + You can get this non-GPL'd firmware file from the Prism54 project page: + . + You will also need the /etc/hotplug/firmware.agent script from + a current hotplug package. + + + Note: You need a motherboard with DMA support to use any of these cards + + If you want to compile the driver as a module ( = code which can be + inserted in and removed from the running kernel whenever you want), + say M here and read . The module + will be called prism54.o. + Aironet 4500/4800 PROC interface CONFIG_AIRONET4500_PROC If you say Y here (and to the "/proc file system" below), you will @@ -17211,10 +17319,11 @@ If you would like to include the NFSv3 server as well as the NFSv2 server, say Y here. If unsure, say Y. -Provide NFS over TCP server support EXPERIMENTAL +Provide NFS over TCP server support CONFIG_NFSD_TCP - Enable NFS service over TCP connections. This the officially - still experimental, but seems to work well. + If you want your NFS server to support TCP connections, say Y here. + TCP connections usually perform better than the default UDP when + the network is lossy or congested. If unsure, say Y. OS/2 HPFS file system support CONFIG_HPFS_FS @@ -25344,13 +25453,13 @@ Support for DASD hard disks CONFIG_DASD Enable this option if you want to access DASDs directly utilizing - S/390s channel subsystem commands. This is necessary for running + S/390's or zSeries' channel subsystem commands. This is necessary for running natively on a single image or an LPAR. Support for ECKD hard disks CONFIG_DASD_ECKD ECKD (Extended Count Key Data) devices are the most commonly used - devices on S/390s. You should enable this option unless you are + devices on zSeries and S/390. You should enable this option unless you are very sure you have no ECKD device. ECKD demand loading @@ -28864,6 +28973,15 @@ This code also includes SHA-384, a 384 bit hash with 192 bits of security against collision attacks. +CONFIG_CRYPTO_WP512 + Whirlpool hash algorithm 512, 384 and 256-bit hashes + + Whirlpool-512 is part of the NESSIE cryptographic primitives. + Whirlpool will be part of the ISO/IEC 10118-3:2003(E) standard + + See also: + http://planeta.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html + CONFIG_CRYPTO_DES DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3). @@ -28891,7 +29009,8 @@ Serpent cipher algorithm, by Anderson, Biham & Knudsen. Keys are allowed to be from 0 to 256 bits in length, in steps - of 8 bits. + of 8 bits. Also includes the 'Tnepres' algorithm, a reversed + variant of Serpent for compatibility with old kerneli code. See also: http://www.cl.cam.ac.uk/~rja14/serpent.html @@ -28932,7 +29051,7 @@ many rounds for security. It is very fast and uses little memory. - Xtendend Tiny Encryption Algorithm is a modifcation to + Xtendend Tiny Encryption Algorithm is a modification to the TEA algorithm to address a potential key weakness in the TEA algorithm. @@ -28944,6 +29063,27 @@ WEP, but it should not be for other purposes because of the weakness of the algorithm. +CONFIG_CRYPTO_KHAZAD + Khazad cipher algorithm. + + Khazad was a finalist in the initial NESSIE competition. It is + an algorithm optimized for 64-bit processors with good performance + on 32-bit processors. Khazad uses an 128 bit key size. + + See also: + http://planeta.terra.com.br/informatica/paulobarreto/KhazadPage.html + +CONFIG_CRYPTO_ANUBIS + Anubis cipher algorithm. + + Anubis is a variable key length cipher which can use keys from + 128 bits to 320 bits in length. It was evaluated as a entrant + in the NESSIE competition. + + See also: + https://www.cosic.esat.kuleuven.ac.be/nessie/reports/ + http://planeta.terra.com.br/informatica/paulobarreto/AnubisPage.html + CONFIG_CRYPTO_DEFLATE This is the Deflate algorithm (RFC1951), specified for use in IPSec with the IPCOMP protocol (RFC3173, RFC2394). diff -urN linux-2.4.27/Documentation/crypto/api-intro.txt linux-2.4.28/Documentation/crypto/api-intro.txt --- linux-2.4.27/Documentation/crypto/api-intro.txt 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/Documentation/crypto/api-intro.txt 2004-11-17 03:54:20.976372320 -0800 @@ -219,6 +219,19 @@ CAST5/CAST6 algorithm contributors: Kartikey Mahendra Bhatt (original developers unknown, FSF copyright). +TEA/XTEA algorithm contributors: + Aaron Grothe + +Khazad algorithm contributors: + Aaron Grothe + +Whirlpool algorithm contributors: + Aaron Grothe + Jean-Luc Cooke + +Anubis algorithm contributors: + Aaron Grothe + Generic scatterwalk code by Adam J. Richter Please send any credits updates or corrections to: diff -urN linux-2.4.27/Documentation/filesystems/xfs.txt linux-2.4.28/Documentation/filesystems/xfs.txt --- linux-2.4.27/Documentation/filesystems/xfs.txt 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/Documentation/filesystems/xfs.txt 2004-11-17 03:54:20.976372320 -0800 @@ -120,19 +120,19 @@ fs.xfs.stats_clear (Min: 0 Default: 0 Max: 1) Setting this to "1" clears accumulated XFS statistics - in /proc/fs/xfs/stat. It then immediately reset to "0". + in /proc/fs/xfs/stat. It then immediately resets to "0". - fs.xfs.sync_interval (Min: HZ Default: 30*HZ Max: 60*HZ) + fs.xfs.xfssyncd_centisecs (Min: 100 Default: 3000 Max: 6000) The interval at which the xfssyncd thread flushes metadata out to disk. This thread will flush log activity out, and do some processing on unlinked inodes. - fs.xfs.age_buffer (Min: 1*HZ Default: 15*HZ Max: 300*HZ) - The age at which xfsbufd flushes dirty metadata buffers to disk. - - fs.xfs.flush_interval (Min: HZ/2 Default: HZ Max: 30*HZ) + fs.xfs.xfsbufd_centisecs (Min: 50 Default: 100 Max: 3000) The interval at which xfsbufd scans the dirty metadata buffers list. + fs.xfs.age_buffer_centisecs (Min: 100 Default: 1500 Max: 30000) + The age at which xfsbufd flushes dirty metadata buffers to disk. + fs.xfs.error_level (Min: 0 Default: 3 Max: 11) A volume knob for error reporting when internal errors occur. This will generate detailed messages & backtraces for filesystem diff -urN linux-2.4.27/Documentation/ftape.txt linux-2.4.28/Documentation/ftape.txt --- linux-2.4.27/Documentation/ftape.txt 1999-11-06 10:38:40.000000000 -0800 +++ linux-2.4.28/Documentation/ftape.txt 2004-11-17 03:54:20.977372361 -0800 @@ -15,7 +15,7 @@ ftape has a home page at -http://www-math.math.rwth-aachen.de/~LBFM/claus/ftape +http://www.instmath.rwth-aachen.de/~heine/ftape/ which contains further information about ftape. Please cross check this WWW address against the address given (if any) in the MAINTAINERS @@ -58,7 +58,7 @@ versions of ftape and useful links to related topics can be found at the ftape home page at -http://www-math.math.rwth-aachen.de/~LBFM/claus/ftape +http://www.instmath.rwth-aachen.de/~heine/ftape/ ******************************************************************************* @@ -132,7 +132,7 @@ or from the ftape home page at - http://www-math.math.rwth-aachen.de/~LBFM/claus/ftape + http://www.instmath.rwth-aachen.de/~heine/ftape/ `ftformat' is contained in the `./contrib/' subdirectory of that separate ftape package. diff -urN linux-2.4.27/Documentation/i2c/writing-clients linux-2.4.28/Documentation/i2c/writing-clients --- linux-2.4.27/Documentation/i2c/writing-clients 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/Documentation/i2c/writing-clients 2004-11-17 03:54:20.978372402 -0800 @@ -24,24 +24,25 @@ routines, a client structure specific information like the actual I2C address. - struct i2c_driver foo_driver - { - /* name */ "Foo version 2.3 and later driver", - /* id */ I2C_DRIVERID_FOO, - /* flags */ I2C_DF_NOTIFY, - /* attach_adapter */ &foo_attach_adapter, - /* detach_client */ &foo_detach_client, - /* command */ &foo_command, /* May be NULL */ - /* inc_use */ &foo_inc_use, /* May be NULL */ - /* dec_use */ &foo_dec_use /* May be NULL */ - } +static struct i2c_driver foo_driver = { + .name = "Foo version 2.3 driver", + .id = I2C_DRIVERID_FOO, /* from i2c-id.h, optional */ + .flags = I2C_DF_NOTIFY, + .attach_adapter = &foo_attach_adapter, + .detach_client = &foo_detach_client, + .command = &foo_command, /* may be NULL */ + .inc_use = &foo_inc_use, /* May be NULL */ + .dec_use = &foo_dec_use, /* May be NULL */ +} The name can be chosen freely, and may be upto 40 characters long. Please use something descriptive here. -The id should be a unique ID. The range 0xf000 to 0xffff is reserved for -local use, and you can use one of those until you start distributing the -driver. Before you do that, contact the i2c authors to get your own ID(s). +If used, the id should be a unique ID. The range 0xf000 to 0xffff is +reserved for local use, and you can use one of those until you start +distributing the driver, at which time you should contact the i2c authors +to get your own ID(s). Note that most of the time you don't need an ID +at all so you can just omit it. Don't worry about the flags field; just put I2C_DF_NOTIFY into it. This means that your driver will be notified when new adapters are found. diff -urN linux-2.4.27/Documentation/isdn/CREDITS linux-2.4.28/Documentation/isdn/CREDITS --- linux-2.4.27/Documentation/isdn/CREDITS 2000-02-15 11:40:42.000000000 -0800 +++ linux-2.4.28/Documentation/isdn/CREDITS 2004-11-17 03:54:20.987372772 -0800 @@ -37,7 +37,7 @@ Andreas Kool (akool@Kool.f.EUnet.de) For contribution of the isdnlog/isdnrep-tool -Pedro Roque Marques (roque@di.fc.ul.pt) +Pedro Roque Marques (pedro_m@yahoo.com) For lot of new ideas and the pcbit driver. Eberhard Moenkeberg (emoenke@gwdg.de) diff -urN linux-2.4.27/Documentation/isdn/README.pcbit linux-2.4.28/Documentation/isdn/README.pcbit --- linux-2.4.27/Documentation/isdn/README.pcbit 1999-11-07 16:34:00.000000000 -0800 +++ linux-2.4.28/Documentation/isdn/README.pcbit 2004-11-17 03:54:20.999373265 -0800 @@ -37,4 +37,4 @@ regards, Pedro. - + diff -urN linux-2.4.27/Documentation/networking/proc_net_tcp.txt linux-2.4.28/Documentation/networking/proc_net_tcp.txt --- linux-2.4.27/Documentation/networking/proc_net_tcp.txt 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.4.28/Documentation/networking/proc_net_tcp.txt 2004-11-17 03:54:21.000373307 -0800 @@ -0,0 +1,47 @@ +This document describes the interfaces /proc/net/tcp and /proc/net/tcp6. + +These /proc interfaces provide information about currently active TCP +connections, and are implemented by tcp_get_info() in net/ipv4/tcp_ipv4.c and +tcp6_get_info() in net/ipv6/tcp_ipv6.c, respectively. + +It will first list all listening TCP sockets, and next list all established +TCP connections. A typical entry of /proc/net/tcp would look like this (split +up into 3 parts because of the length of the line): + + 46: 010310AC:9C4C 030310AC:1770 01 + | | | | | |--> connection state + | | | | |------> remote TCP port number + | | | |-------------> remote IPv4 address + | | |--------------------> local TCP port number + | |---------------------------> local IPv4 address + |----------------------------------> number of entry + + 00000150:00000000 01:00000019 00000000 + | | | | |--> number of unrecovered RTO timeouts + | | | |----------> number of jiffies until timer expires + | | |----------------> timer_active (see below) + | |----------------------> receive-queue + |-------------------------------> transmit-queue + + 1000 0 54165785 4 cd1e6040 25 4 27 3 -1 + | | | | | | | | | |--> slow start size threshold, + | | | | | | | | | or -1 if the treshold + | | | | | | | | | is >= 0xFFFF + | | | | | | | | |----> sending congestion window + | | | | | | | |-------> (ack.quick<<1)|ack.pingpong + | | | | | | |---------> Predicted tick of soft clock + | | | | | | (delayed ACK control data) + | | | | | |------------> retransmit timeout + | | | | |------------------> location of socket in memory + | | | |-----------------------> socket reference count + | | |-----------------------------> inode + | |----------------------------------> unanswered 0-window probes + |---------------------------------------------> uid + +timer_active: + 0 no timer is pending + 1 retransmit-timer is pending + 2 another timer (e.g. delayed ack or keepalive) is pending + 3 this is a socket in TIME_WAIT state. Not all fields will contain + data (or even exist) + 4 zero window probe timer is pending diff -urN linux-2.4.27/MAINTAINERS linux-2.4.28/MAINTAINERS --- linux-2.4.27/MAINTAINERS 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/MAINTAINERS 2004-11-17 03:54:21.002373389 -0800 @@ -177,10 +177,8 @@ AD1816 SOUND DRIVER P: Thorsten Knabe -M: Thorsten Knabe -M: Thorsten Knabe -W: http://www.student.informatik.tu-darmstadt.de/~tek/projects/linux.html -W: http://www.tu-darmstadt.de/~tek01/projects/linux.html +M: Thorsten Knabe +W: http://linux.thorsten-knabe.de S: Maintained ADVANSYS SCSI DRIVER @@ -456,14 +454,20 @@ S: Maintained +HP 66Mhz FIBRE CHANNEL DRIVER +P: Chirag Kantharia +M: chirag.kantharia@hp.com +L: iss_storagedev@hp.com +S: Maintained + HP SMART2 RAID DRIVER -P: Francis Wiran -M: francis.wiran@hp.com +P: Chirag Kantharia +M: chirag.kantharia@hp.com L: iss_storagedev@hp.com -S: Odd Fixes +S: Maintained -HP SMART CISS RAID DRIVER -P: Mike Miller, Michael Ni +HP SMART ARRAY CISS RAID DRIVER (cciss) +P: Mike Miller M: mike.miller@hp.com L: iss_storagedev@hp.com S: Supported @@ -530,15 +534,7 @@ P: James Morris M: jmorris@intercode.com.au P: David S. Miller -M: davem@redhat.com -MARVELL YUKON / SYSKONNECT DRIVER -P: Mirko Lindner -M: mlindner@syskonnect.de -P: Ralph Roesler -M: rroesler@syskonnect.de -W: http://www.syskonnect.com -S: Supported -M: davem@redhat.com +M: davem@davemloft.net W http://samba.org/~jamesm/crypto/ L: linux-kernel@vger.kernel.org S: Maintained @@ -577,9 +573,9 @@ S: Maintained DECnet NETWORK LAYER -P: Steven Whitehouse -M: SteveW@ACM.org -W: http://www.sucs.swan.ac.uk/~rohan/DECnet/index.html +P: Patrick Caulfield +M: patrick@tykepenguin.com +W: http://linux-decnet.sourceforge.net L: linux-decnet-user@lists.sourceforge.net S: Maintained @@ -770,7 +766,7 @@ GDT SCSI DISK ARRAY CONTROLLER DRIVER P: Achim Leubner -M: achim@vortex.de +M: achim_leubner@adaptec.com L: linux-scsi@vger.kernel.org W: http://www.icp-vortex.com/ S: Supported @@ -997,6 +993,14 @@ M: jjciarla@raiz.uncu.edu.ar S: Maintained +IPVS +P: Wensong Zhang +M: wensong@linux-vs.org +P: Julian Anastasov +M: ja@ssi.bg +L: lvs-users@linuxvirtualserver.org +S: Maintained + IPX NETWORK LAYER P: Arnaldo Carvalho de Melo M: acme@conectiva.com.br @@ -1188,6 +1192,14 @@ L: netdev@oss.sgi.com S: Supported +MARVELL YUKON / SYSKONNECT DRIVER +P: Mirko Lindner +M: mlindner@syskonnect.de +P: Ralph Roesler +M: rroesler@syskonnect.de +W: http://www.syskonnect.com +S: Supported + MATROX FRAMEBUFFER DRIVER P: Petr Vandrovec M: vandrove@vc.cvut.cz @@ -1312,7 +1324,7 @@ NETWORKING [IPv4/IPv6] P: David S. Miller -M: davem@redhat.com +M: davem@davemloft.net P: Alexey Kuznetsov M: kuznet@ms2.inr.ac.ru P: Pekka Savola (ipv6) @@ -1448,9 +1460,8 @@ S: Odd Fixes PCI HOTPLUG CORE -P: Greg Kroah-Hartman -M: greg@kroah.com -M: gregkh@us.ibm.com +P: Gary Hade +M: garyhade@us.ibm.com S: Supported PCI HOTPLUG COMPAQ DRIVER @@ -1525,6 +1536,13 @@ M: mostrows@styx.uwaterloo.ca S: Maintained +PRISM54 WIRELESS DRIVER +P: Prism54 Development Team +M: prism54-private@prism54.org +L: netdev@oss.sgi.com +W: http://prism54.org +S: Maintained + PROMISE DC4030 CACHING DISK CONTROLLER DRIVER P: Peter Denison M: promise@pnd-pc.demon.co.uk @@ -1689,10 +1707,8 @@ SMB FILESYSTEM P: Urban Widmark -M: urban@teststation.com -W: http://samba.org/ -L: samba@samba.org -S: Maintained +M: Urban.Widmark@enlight.net +S: Odd Fixes SNA NETWORK LAYER P: Jay Schulist @@ -1723,7 +1739,7 @@ UltraSPARC (sparc64): P: David S. Miller -M: davem@redhat.com +M: davem@davemloft.net P: Eddie C. Dost M: ecd@skynet.be P: Jakub Jelinek @@ -1735,8 +1751,8 @@ S: Maintained SPARC (sparc32): -P: Keith M. Wesolowski -M: wesolows@foobazco.org +P: William L. Irwin +M: wli@holomorphy.com L: sparclinux@vger.kernel.org S: Maintained diff -urN linux-2.4.27/Makefile linux-2.4.28/Makefile --- linux-2.4.27/Makefile 2004-08-07 16:26:07.000000000 -0700 +++ linux-2.4.28/Makefile 2004-11-17 03:54:22.250424702 -0800 @@ -1,6 +1,6 @@ VERSION = 2 PATCHLEVEL = 4 -SUBLEVEL = 27 +SUBLEVEL = 28 EXTRAVERSION = KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff -urN linux-2.4.27/arch/alpha/boot/bootloader.lds linux-2.4.28/arch/alpha/boot/bootloader.lds --- linux-2.4.27/arch/alpha/boot/bootloader.lds 2001-07-02 14:40:14.000000000 -0700 +++ linux-2.4.28/arch/alpha/boot/bootloader.lds 2004-11-17 03:54:21.003373430 -0800 @@ -1,5 +1,6 @@ OUTPUT_FORMAT("elf64-alpha") ENTRY(__start) +printk = srm_printk; SECTIONS { . = 0x20000000; diff -urN linux-2.4.27/arch/alpha/boot/bootp.c linux-2.4.28/arch/alpha/boot/bootp.c --- linux-2.4.27/arch/alpha/boot/bootp.c 2001-10-04 18:47:08.000000000 -0700 +++ linux-2.4.28/arch/alpha/boot/bootp.c 2004-11-17 03:54:21.003373430 -0800 @@ -26,6 +26,8 @@ struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa, unsigned long *vptb); +extern void move_stack(unsigned long new_stack); + struct hwrpb_struct *hwrpb = INIT_HWRPB; static struct pcb_struct pcb_va[1]; @@ -118,12 +120,10 @@ runkernel(void) { __asm__ __volatile__( - "bis %1,%1,$30\n\t" "bis %0,%0,$27\n\t" "jmp ($27)" : /* no outputs: it doesn't even return */ - : "r" (START_ADDR), - "r" (PAGE_SIZE + INIT_STACK)); + : "r" (START_ADDR)); } extern char _end; @@ -147,9 +147,7 @@ */ static long nbytes; static char envval[256] __attribute__((aligned(8))); -#ifdef INITRD_IMAGE_SIZE static unsigned long initrd_start; -#endif srm_printk("Linux/AXP bootp loader for Linux " UTS_RELEASE "\n"); if (INIT_HWRPB->pagesize != 8192) { @@ -164,13 +162,20 @@ } pal_init(); -#ifdef INITRD_IMAGE_SIZE /* The initrd must be page-aligned. See below for the cause of the magic number 5. */ - initrd_start = ((START_ADDR + 5*KERNEL_SIZE) | (PAGE_SIZE-1)) + 1; + initrd_start = ((START_ADDR + 5*KERNEL_SIZE + PAGE_SIZE) | + (PAGE_SIZE-1)) + 1; +#ifdef INITRD_IMAGE_SIZE srm_printk("Initrd positioned at %#lx\n", initrd_start); #endif + /* + * Move the stack to a safe place to ensure it won't be + * overwritten by kernel image. + */ + move_stack(initrd_start - PAGE_SIZE); + nbytes = callback_getenv(ENV_BOOTED_OSFLAGS, envval, sizeof(envval)); if (nbytes < 0 || nbytes >= sizeof(envval)) { nbytes = 0; diff -urN linux-2.4.27/arch/alpha/boot/bootpz.c linux-2.4.28/arch/alpha/boot/bootpz.c --- linux-2.4.27/arch/alpha/boot/bootpz.c 2003-11-28 10:26:19.000000000 -0800 +++ linux-2.4.28/arch/alpha/boot/bootpz.c 2004-11-17 03:54:21.004373471 -0800 @@ -32,9 +32,6 @@ #undef DEBUG_CHECK_RANGE #define DEBUG_ADDRESSES -#define DEBUG_SP(x) \ - {register long sp asm("30"); srm_printk("%s (sp=%lx)\n", x, sp);} - extern unsigned long switch_to_osf_pal(unsigned long nr, struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa, unsigned long *vptb); @@ -42,6 +39,8 @@ extern int decompress_kernel(void* destination, void *source, size_t ksize, size_t kzsize); +extern void move_stack(unsigned long new_stack); + struct hwrpb_struct *hwrpb = INIT_HWRPB; static struct pcb_struct pcb_va[1]; @@ -154,12 +153,10 @@ runkernel(void) { __asm__ __volatile__( - "bis %1,%1,$30\n\t" "bis %0,%0,$27\n\t" "jmp ($27)" : /* no outputs: it doesn't even return */ - : "r" (START_ADDR), - "r" (PAGE_SIZE + INIT_STACK)); + : "r" (START_ADDR)); } /* Must record the SP (it is virtual) on entry, so we can make sure @@ -244,7 +241,9 @@ for "bootmem" anyway. */ #define K_COPY_IMAGE_START NEXT_PAGE(K_KERNEL_IMAGE_END) -#define K_INITRD_START NEXT_PAGE(K_COPY_IMAGE_START + KERNEL_SIZE) +/* Reserve one page below INITRD for the new stack. */ +#define K_INITRD_START \ + NEXT_PAGE(K_COPY_IMAGE_START + KERNEL_SIZE + PAGE_SIZE) #define K_COPY_IMAGE_END \ (K_INITRD_START + REAL_INITRD_SIZE + MALLOC_AREA_SIZE) #define K_COPY_IMAGE_SIZE \ @@ -410,8 +409,7 @@ initrd_image_start, INITRD_IMAGE_SIZE); #endif - memcpy((void *)initrd_image_start, - (void *)V_INITRD_START, + memcpy((void *)initrd_image_start, (void *)V_INITRD_START, INITRD_IMAGE_SIZE); #endif /* INITRD_IMAGE_SIZE */ @@ -427,9 +425,14 @@ K_KERNEL_IMAGE_START, (unsigned)KERNEL_SIZE); #endif + /* + * Move the stack to a safe place to ensure it won't be + * overwritten by kernel image. + */ + move_stack(initrd_image_start - PAGE_SIZE); + memcpy((void *)K_KERNEL_IMAGE_START, - (void *)uncompressed_image_start, - KERNEL_SIZE); + (void *)uncompressed_image_start, KERNEL_SIZE); } /* Clear the zero page, then move the argument list in. */ diff -urN linux-2.4.27/arch/alpha/boot/head.S linux-2.4.28/arch/alpha/boot/head.S --- linux-2.4.27/arch/alpha/boot/head.S 1998-10-12 11:40:12.000000000 -0700 +++ linux-2.4.28/arch/alpha/boot/head.S 2004-11-17 03:54:21.005373512 -0800 @@ -100,3 +100,24 @@ .prologue 0 call_pal PAL_halt .end halt + +/* $16 - new stack page */ + .align 3 + .globl move_stack + .ent move_stack +move_stack: + .prologue 0 + lda $0, 0x1fff($31) + and $0, $30, $1 /* Stack offset */ + or $1, $16, $16 /* New stack pointer */ + mov $30, $1 + mov $16, $2 +1: ldq $3, 0($1) /* Move the stack */ + addq $1, 8, $1 + stq $3, 0($2) + and $0, $1, $4 + addq $2, 8, $2 + bne $4, 1b + mov $16, $30 + ret ($26) + .end move_stack diff -urN linux-2.4.27/arch/alpha/config.in linux-2.4.28/arch/alpha/config.in --- linux-2.4.27/arch/alpha/config.in 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/alpha/config.in 2004-11-17 03:54:21.005373512 -0800 @@ -314,7 +314,7 @@ bool ' OSF/1 v4 readv/writev compatibility' CONFIG_OSF4_COMPAT fi -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC tristate 'Kernel support for Linux/Intel ELF binaries' CONFIG_BINFMT_EM86 source drivers/parport/Config.in diff -urN linux-2.4.27/arch/arm/config.in linux-2.4.28/arch/arm/config.in --- linux-2.4.27/arch/arm/config.in 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/arm/config.in 2004-11-17 03:54:21.006373553 -0800 @@ -498,7 +498,7 @@ "ELF CONFIG_KCORE_ELF \ A.OUT CONFIG_KCORE_AOUT" ELF tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC dep_bool 'Power Management support (experimental)' CONFIG_PM $CONFIG_EXPERIMENTAL dep_tristate 'RISC OS personality' CONFIG_ARTHUR $CONFIG_CPU_32 diff -urN linux-2.4.27/arch/cris/config.in linux-2.4.28/arch/cris/config.in --- linux-2.4.27/arch/cris/config.in 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/cris/config.in 2004-11-17 03:54:21.006373553 -0800 @@ -30,7 +30,7 @@ bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT bool 'Sysctl support' CONFIG_SYSCTL -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF string 'Kernel command line' CONFIG_ETRAX_CMDLINE "root=/dev/mtdblock3" diff -urN linux-2.4.27/arch/i386/Makefile linux-2.4.28/arch/i386/Makefile --- linux-2.4.27/arch/i386/Makefile 2003-06-13 07:51:29.000000000 -0700 +++ linux-2.4.28/arch/i386/Makefile 2004-11-17 03:54:21.007373594 -0800 @@ -94,6 +94,10 @@ CFLAGS += $(call check_gcc,-march=c3-2,-march=i686) endif +# Disable unit-at-a-time mode, it makes gcc use a lot more stack +# due to the lack of sharing of stacklots. +CFLAGS += $(call check_gcc,-fno-unit-at-a-time,) + HEAD := arch/i386/kernel/head.o arch/i386/kernel/init_task.o SUBDIRS += arch/i386/kernel arch/i386/mm arch/i386/lib diff -urN linux-2.4.27/arch/i386/boot/compressed/misc.c linux-2.4.28/arch/i386/boot/compressed/misc.c --- linux-2.4.27/arch/i386/boot/compressed/misc.c 2003-08-25 04:44:39.000000000 -0700 +++ linux-2.4.28/arch/i386/boot/compressed/misc.c 2004-11-17 03:54:21.008373636 -0800 @@ -104,7 +104,7 @@ static void *malloc(int size); static void free(void *where); -static void puts(const char *); +static void putstr(const char *); extern int end; static long free_mem_ptr = (long)&end; @@ -165,7 +165,7 @@ vidmem[i] = ' '; } -static void puts(const char *s) +static void putstr(const char *s) { int x,y,pos; char c; @@ -283,9 +283,9 @@ static void error(char *x) { - puts("\n\n"); - puts(x); - puts("\n\n -- System halted"); + putstr("\n\n"); + putstr(x); + putstr("\n\n -- System halted"); while(1); /* Halt */ } @@ -369,9 +369,9 @@ else setup_output_buffer_if_we_run_high(mv); makecrc(); - puts("Uncompressing Linux... "); + putstr("Uncompressing Linux... "); gunzip(); - puts("Ok, booting the kernel.\n"); + putstr("Ok, booting the kernel.\n"); if (high_loaded) close_output_buffer_if_we_run_high(mv); return high_loaded; } diff -urN linux-2.4.27/arch/i386/config.in linux-2.4.28/arch/i386/config.in --- linux-2.4.27/arch/i386/config.in 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/i386/config.in 2004-11-17 03:54:21.008373636 -0800 @@ -326,7 +326,7 @@ A.OUT CONFIG_KCORE_AOUT" ELF fi tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC bool 'Select task to kill on out of memory condition' CONFIG_OOM_KILLER diff -urN linux-2.4.27/arch/i386/defconfig linux-2.4.28/arch/i386/defconfig --- linux-2.4.27/arch/i386/defconfig 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/i386/defconfig 2004-11-17 03:54:21.009373677 -0800 @@ -758,7 +758,7 @@ # CONFIG_ROOT_NFS is not set CONFIG_NFSD=y # CONFIG_NFSD_V3 is not set -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_SUNRPC=y CONFIG_LOCKD=y # CONFIG_SMB_FS is not set diff -urN linux-2.4.27/arch/i386/kernel/acpi.c linux-2.4.28/arch/i386/kernel/acpi.c --- linux-2.4.27/arch/i386/kernel/acpi.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/i386/kernel/acpi.c 2004-11-17 03:54:21.010373718 -0800 @@ -389,6 +389,8 @@ } +extern int mp_irqs_alloc(void); + /* * acpi_boot_init() * called from setup_arch(), always. diff -urN linux-2.4.27/arch/i386/kernel/apic.c linux-2.4.28/arch/i386/kernel/apic.c --- linux-2.4.27/arch/i386/kernel/apic.c 2003-11-28 10:26:19.000000000 -0800 +++ linux-2.4.28/arch/i386/kernel/apic.c 2004-11-17 03:54:21.011373759 -0800 @@ -658,6 +658,12 @@ if (!cpu_has_apic) { /* + * Over-ride BIOS and try to enable LAPIC + * only if "lapic" specified + */ + if (enable_local_apic != 1) + goto no_apic; + /* * Some BIOSes disable the local APIC in the * APIC_BASE MSR. This can only be done in * software for Intel P6 and AMD K7 (Model > 1). diff -urN linux-2.4.27/arch/i386/kernel/dmi_scan.c linux-2.4.28/arch/i386/kernel/dmi_scan.c --- linux-2.4.27/arch/i386/kernel/dmi_scan.c 2004-04-14 06:05:25.000000000 -0700 +++ linux-2.4.28/arch/i386/kernel/dmi_scan.c 2004-11-17 03:54:21.012373800 -0800 @@ -328,26 +328,6 @@ } /* - * Some machines, usually laptops, can't handle an enabled local APIC. - * The symptoms include hangs or reboots when suspending or resuming, - * attaching or detaching the power cord, or entering BIOS setup screens - * through magic key sequences. - */ -static int __init local_apic_kills_bios(struct dmi_blacklist *d) -{ -#ifdef CONFIG_X86_LOCAL_APIC - extern int enable_local_apic; - if (enable_local_apic == 0) { - enable_local_apic = -1; - printk(KERN_WARNING "%s with broken BIOS detected. " - "Refusing to enable the local APIC.\n", - d->ident); - } -#endif - return 0; -} - -/* * Check for clue free BIOS implementations who use * the following QA technique * @@ -459,6 +439,22 @@ } /* + * Work around broken Acer TravelMate 360 Notebooks which assign Cardbus to + * IRQ 11 even though it is actually wired to IRQ 10 + */ +static __init int fix_acer_tm360_irqrouting(struct dmi_blacklist *d) +{ +#ifdef CONFIG_PCI + extern int acer_tm360_irqrouting; + if (acer_tm360_irqrouting == 0) { + acer_tm360_irqrouting = 1; + printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); + } +#endif + return 0; +} + +/* * Exploding PnPBIOS. Don't yet know if its the BIOS or us for * some entries */ @@ -776,26 +772,6 @@ MATCH(DMI_PRODUCT_NAME, "P14H") } }, - /* Machines which have problems handling enabled local APICs */ - - { local_apic_kills_bios, "Dell Inspiron", { - MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - MATCH(DMI_PRODUCT_NAME, "Inspiron"), - NO_MATCH, NO_MATCH - } }, - - { local_apic_kills_bios, "Dell Latitude", { - MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - MATCH(DMI_PRODUCT_NAME, "Latitude"), - NO_MATCH, NO_MATCH - } }, - - { local_apic_kills_bios, "IBM Thinkpad T20", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_BOARD_NAME, "264741U"), - NO_MATCH, NO_MATCH - } }, - { init_ints_after_s1, "Toshiba Satellite 4030cdt", { /* Reinitialization of 8259 is needed after S1 resume */ MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), NO_MATCH, NO_MATCH, NO_MATCH @@ -820,6 +796,12 @@ MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736") } }, + { fix_acer_tm360_irqrouting, "Acer TravelMate 36x Laptop", { + MATCH(DMI_SYS_VENDOR, "Acer"), + MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + NO_MATCH, NO_MATCH + } }, + /* * Generic per vendor APM settings */ @@ -935,6 +917,13 @@ /* newer BIOS, Revision 1011, does work */ MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), NO_MATCH }}, + + { disable_acpi_pci, "Acer TravelMate 36x Laptop", { + MATCH(DMI_SYS_VENDOR, "Acer"), + MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + NO_MATCH, NO_MATCH + } }, + #endif /* CONFIG_ACPI_PCI */ { NULL, } diff -urN linux-2.4.27/arch/i386/kernel/io_apic.c linux-2.4.28/arch/i386/kernel/io_apic.c --- linux-2.4.27/arch/i386/kernel/io_apic.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/i386/kernel/io_apic.c 2004-11-17 03:54:21.013373841 -0800 @@ -1349,7 +1349,7 @@ #ifndef CONFIG_SMP -void send_IPI_self(int vector) +void fastcall send_IPI_self(int vector) { unsigned int cfg; diff -urN linux-2.4.27/arch/i386/kernel/mpparse.c linux-2.4.28/arch/i386/kernel/mpparse.c --- linux-2.4.27/arch/i386/kernel/mpparse.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/i386/kernel/mpparse.c 2004-11-17 03:54:21.014373882 -0800 @@ -1105,7 +1105,7 @@ /* allocate mp_irqs[] for ACPI parsing table parsing */ -int __init mp_irqs_alloc() +int __init mp_irqs_alloc(void) { int size = (MAX_IRQ_SOURCES * sizeof(int)) * 4; diff -urN linux-2.4.27/arch/i386/kernel/pci-irq.c linux-2.4.28/arch/i386/kernel/pci-irq.c --- linux-2.4.27/arch/i386/kernel/pci-irq.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/i386/kernel/pci-irq.c 2004-11-17 03:54:21.015373923 -0800 @@ -23,6 +23,7 @@ #define PIRQ_VERSION 0x0100 int broken_hp_bios_irq9; +int acer_tm360_irqrouting; static struct irq_routing_table *pirq_table; @@ -894,6 +895,14 @@ r->set(pirq_router_dev, dev, pirq, 11); } + /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */ + if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) { + pirq = 0x68; + mask = 0x400; + dev->irq = r->get(pirq_router_dev, dev, pirq); + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); + } + /* * Find the best IRQ to assign: use the one * reported by the device if possible. @@ -1103,10 +1112,6 @@ } dev = temp_dev; if (irq >= 0) { -#ifdef CONFIG_PCI_USE_VECTOR - if (!platform_legacy_irq(irq)) - irq = IO_APIC_VECTOR(irq); -#endif printk(KERN_INFO "PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n", dev->bus->number, PCI_SLOT(dev->devfn), pin, irq); dev->irq = irq; @@ -1124,6 +1129,6 @@ /* VIA bridges use interrupt line for apic/pci steering across the V-Link */ else if (interrupt_line_quirk) - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq & 15); } diff -urN linux-2.4.27/arch/i386/kernel/pci-pc.c linux-2.4.28/arch/i386/kernel/pci-pc.c --- linux-2.4.27/arch/i386/kernel/pci-pc.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/i386/kernel/pci-pc.c 2004-11-17 03:54:21.016373964 -0800 @@ -177,6 +177,7 @@ }; #endif /* !CONFIG_MULTIQUAD */ +#undef PCI_CONF1_ADDRESS #define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \ (0x80000000 | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3)) @@ -1017,11 +1018,13 @@ "1:" : "=a" (ret), "=b" (map), - "+m" (opt) + "=m" (opt) : "0" (PCIBIOS_GET_ROUTING_OPTIONS), "1" (0), "D" ((long) &opt), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "m" (opt) + : "memory"); DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map); if (ret & 0xff00) printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff); diff -urN linux-2.4.27/arch/i386/kernel/process.c linux-2.4.28/arch/i386/kernel/process.c --- linux-2.4.27/arch/i386/kernel/process.c 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/i386/kernel/process.c 2004-11-17 03:54:21.017374006 -0800 @@ -644,7 +644,7 @@ * More important, however, is the fact that this allows us much * more flexibility. */ -void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) +void fastcall __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; diff -urN linux-2.4.27/arch/i386/kernel/signal.c linux-2.4.28/arch/i386/kernel/signal.c --- linux-2.4.27/arch/i386/kernel/signal.c 2002-08-02 17:39:42.000000000 -0700 +++ linux-2.4.28/arch/i386/kernel/signal.c 2004-11-17 03:54:21.048375280 -0800 @@ -581,7 +581,7 @@ * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ -int do_signal(struct pt_regs *regs, sigset_t *oldset) +int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) { siginfo_t info; struct k_sigaction *ka; diff -urN linux-2.4.27/arch/i386/kernel/smp.c linux-2.4.28/arch/i386/kernel/smp.c --- linux-2.4.27/arch/i386/kernel/smp.c 2003-06-13 07:51:29.000000000 -0700 +++ linux-2.4.28/arch/i386/kernel/smp.c 2004-11-17 03:54:21.049375321 -0800 @@ -150,7 +150,7 @@ apic_write_around(APIC_ICR, cfg); } -void send_IPI_self(int vector) +void fastcall send_IPI_self(int vector) { __send_IPI_shortcut(APIC_DEST_SELF, vector); } @@ -497,7 +497,7 @@ * anything. Worst case is that we lose a reschedule ... */ -void smp_send_reschedule(int cpu) +void fastcall smp_send_reschedule(int cpu) { send_IPI_mask(1 << cpu, RESCHEDULE_VECTOR); } diff -urN linux-2.4.27/arch/i386/kernel/vm86.c linux-2.4.28/arch/i386/kernel/vm86.c --- linux-2.4.27/arch/i386/kernel/vm86.c 2003-08-25 04:44:39.000000000 -0700 +++ linux-2.4.28/arch/i386/kernel/vm86.c 2004-11-17 03:54:21.050375362 -0800 @@ -91,7 +91,7 @@ #define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1) struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs)); -struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs) +struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) { struct tss_struct *tss; struct pt_regs *ret; diff -urN linux-2.4.27/arch/ia64/config.in linux-2.4.28/arch/ia64/config.in --- linux-2.4.27/arch/ia64/config.in 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/ia64/config.in 2004-11-17 03:54:21.050375362 -0800 @@ -123,7 +123,7 @@ bool 'System V IPC' CONFIG_SYSVIPC bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT bool 'Sysctl support' CONFIG_SYSCTL -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC if [ "$CONFIG_IA64_HP_SIM" = "n" ]; then diff -urN linux-2.4.27/arch/ia64/defconfig linux-2.4.28/arch/ia64/defconfig --- linux-2.4.27/arch/ia64/defconfig 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/ia64/defconfig 2004-11-17 03:54:21.051375403 -0800 @@ -755,7 +755,7 @@ # CONFIG_ROOT_NFS is not set CONFIG_NFSD=y CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_SUNRPC=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y diff -urN linux-2.4.27/arch/m68k/atari/stram.c linux-2.4.28/arch/m68k/atari/stram.c --- linux-2.4.27/arch/m68k/atari/stram.c 2003-06-13 07:51:31.000000000 -0700 +++ linux-2.4.28/arch/m68k/atari/stram.c 2004-11-17 03:54:21.071376226 -0800 @@ -190,7 +190,7 @@ * -1 = do swapping (to whole ST-RAM) if it's less than MAX_STRAM_FRACTION of * total memory */ -static int max_swap_size = -1; +static int max_swap_size = 0; /* start and end of swapping area */ static void *swap_start, *swap_end; @@ -306,7 +306,7 @@ reserve_bootmem (0, PAGE_SIZE); #ifdef CONFIG_STRAM_SWAP - { + if (max_swap_size) { void *swap_data; start_mem = (void *) PAGE_ALIGN ((unsigned long) start_mem); @@ -961,14 +961,33 @@ return( max_start ); } +#ifdef CONFIG_STRAM_SWAP /* setup parameters from command line */ -void __init stram_swap_setup(char *str, int *ints) +void __init stram_swap_setup(char *str) { - if (ints[0] >= 1) - max_swap_size = ((ints[1] < 0 ? 0 : ints[1]) * 1024) & PAGE_MASK; + int ints[3]; + get_options(str, ARRAY_SIZE(ints), ints); + if (ints[0] >= 1) { + if (ints[1] == 1) { + /* always use ST-RAM as swap */ + max_swap_size = -1; + if (ints[0] == 2) { + max_swap_size = ((ints[2] < 0 ? 0 : ints[2]) * 1024) & PAGE_MASK; + } + } + else if (ints[1] == 0) { + /* never use ST-RAM as swap */ + max_swap_size = 0; + } + else { + printk( KERN_WARNING "stram_swap=%d - invalid value\n", ints[1] ); + } + } } +#endif /* CONFIG_STRAM_SWAP */ + /* ------------------------------------------------------------------------ */ /* ST-RAM device */ diff -urN linux-2.4.27/arch/m68k/config.in linux-2.4.28/arch/m68k/config.in --- linux-2.4.27/arch/m68k/config.in 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/m68k/config.in 2004-11-17 03:54:21.073376308 -0800 @@ -98,7 +98,7 @@ A.OUT CONFIG_KCORE_AOUT" ELF fi tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC if [ "$CONFIG_AMIGA" = "y" ]; then diff -urN linux-2.4.27/arch/m68k/kernel/setup.c linux-2.4.28/arch/m68k/kernel/setup.c --- linux-2.4.27/arch/m68k/kernel/setup.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/m68k/kernel/setup.c 2004-11-17 03:54:21.073376308 -0800 @@ -290,13 +290,20 @@ i = 1; } #ifdef CONFIG_ATARI - /* This option must be parsed very early */ + /* These options must be parsed very early */ if (!strncmp( p, "switches=", 9 )) { extern void atari_switches_setup( const char *, int ); atari_switches_setup( p+9, (q = strchr( p+9, ' ' )) ? (q - (p+9)) : strlen(p+9) ); i = 1; } +#ifdef CONFIG_STRAM_SWAP + if (!strncmp( p, "stram_swap=", 11 )) { + extern void stram_swap_setup( char * ); + stram_swap_setup( p+11 ); + i = 1; + } +#endif /* CONFIG_STRAM_SWAP */ #endif if (i) { diff -urN linux-2.4.27/arch/m68k/mm/motorola.c linux-2.4.28/arch/m68k/mm/motorola.c --- linux-2.4.27/arch/m68k/mm/motorola.c 2003-08-25 04:44:39.000000000 -0700 +++ linux-2.4.28/arch/m68k/mm/motorola.c 2004-11-17 03:54:21.074376349 -0800 @@ -264,7 +264,7 @@ printk ("before free_area_init\n"); #endif zones_size[0] = (mach_max_dma_address < (unsigned long)high_memory ? - mach_max_dma_address : (unsigned long)high_memory); + (mach_max_dma_address+1) : (unsigned long)high_memory); zones_size[1] = (unsigned long)high_memory - zones_size[0]; zones_size[0] = (zones_size[0] - PAGE_OFFSET) >> PAGE_SHIFT; diff -urN linux-2.4.27/arch/mips/config-shared.in linux-2.4.28/arch/mips/config-shared.in --- linux-2.4.27/arch/mips/config-shared.in 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/mips/config-shared.in 2004-11-17 03:54:21.075376390 -0800 @@ -943,7 +943,7 @@ define_bool CONFIG_KCORE_ELF y define_bool CONFIG_KCORE_AOUT n define_bool CONFIG_BINFMT_AOUT n -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF dep_bool 'Kernel support for Linux/MIPS 32-bit binary compatibility' CONFIG_MIPS32_COMPAT $CONFIG_MIPS64 dep_bool 'Kernel support for o32 binaries' CONFIG_MIPS32_O32 $CONFIG_MIPS32_COMPAT dep_bool 'Kernel support for n32 binaries' CONFIG_MIPS32_N32 $CONFIG_MIPS32_COMPAT diff -urN linux-2.4.27/arch/mips/defconfig linux-2.4.28/arch/mips/defconfig --- linux-2.4.27/arch/mips/defconfig 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/mips/defconfig 2004-11-17 03:54:21.076376431 -0800 @@ -607,7 +607,7 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=y # CONFIG_NFSD_V3 is not set -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_SUNRPC=y CONFIG_LOCKD=y # CONFIG_SMB_FS is not set diff -urN linux-2.4.27/arch/mips64/defconfig linux-2.4.28/arch/mips64/defconfig --- linux-2.4.27/arch/mips64/defconfig 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/mips64/defconfig 2004-11-17 03:54:21.076376431 -0800 @@ -563,7 +563,7 @@ CONFIG_ROOT_NFS=y # CONFIG_NFSD is not set # CONFIG_NFSD_V3 is not set -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_SUNRPC=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y diff -urN linux-2.4.27/arch/mips64/kernel/linux32.c linux-2.4.28/arch/mips64/kernel/linux32.c --- linux-2.4.27/arch/mips64/kernel/linux32.c 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/mips64/kernel/linux32.c 2004-11-17 03:54:21.078376514 -0800 @@ -1568,7 +1568,7 @@ asmlinkage int sys32_setsockopt(int fd, int level, int optname, char *optval, int optlen) { - if (optname == SO_ATTACH_FILTER) + if (level == SOL_SOCKET && optname == SO_ATTACH_FILTER) return do_set_attach_filter(fd, level, optname, optval, optlen); if (level == SOL_ICMPV6 && optname == ICMPV6_FILTER) diff -urN linux-2.4.27/arch/parisc/config.in linux-2.4.28/arch/parisc/config.in --- linux-2.4.27/arch/parisc/config.in 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/parisc/config.in 2004-11-17 03:54:21.079376555 -0800 @@ -88,7 +88,7 @@ bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT bool 'Sysctl support' CONFIG_SYSCTL define_bool CONFIG_KCORE_ELF y -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for SOM binaries' CONFIG_BINFMT_SOM tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC diff -urN linux-2.4.27/arch/ppc/defconfig linux-2.4.28/arch/ppc/defconfig --- linux-2.4.27/arch/ppc/defconfig 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/ppc/defconfig 2004-11-17 03:54:21.080376596 -0800 @@ -812,7 +812,7 @@ # CONFIG_ROOT_NFS is not set CONFIG_NFSD=y # CONFIG_NFSD_V3 is not set -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_SUNRPC=y CONFIG_LOCKD=y # CONFIG_SMB_FS is not set diff -urN linux-2.4.27/arch/ppc/kernel/cputable.c linux-2.4.28/arch/ppc/kernel/cputable.c --- linux-2.4.27/arch/ppc/kernel/cputable.c 2004-04-14 06:05:27.000000000 -0700 +++ linux-2.4.28/arch/ppc/kernel/cputable.c 2004-11-17 03:54:21.081376637 -0800 @@ -51,10 +51,20 @@ #define CPU_FTR_ALTIVEC_COMP 0 #endif +/* We need to mark all pages as being coherent if we're SMP or we + * have a 754x and an MPC107 host bridge. + */ +#if defined(CONFIG_SMP) || defined(CONFIG_MPC10X_BRIDGE) +#define CPU_FTR_COMMON CPU_FTR_NEED_COHERENT +#else +#define CPU_FTR_COMMON 0 +#endif + struct cpu_spec cpu_specs[] = { #if CLASSIC_PPC { /* 601 */ 0xffff0000, 0x00010000, "601", + CPU_FTR_COMMON | CPU_FTR_601 | CPU_FTR_HPTE_TABLE, COMMON_PPC | PPC_FEATURE_601_INSTR | PPC_FEATURE_UNIFIED_CACHE, 32, 32, @@ -62,6 +72,7 @@ }, { /* 603 */ 0xffff0000, 0x00030000, "603", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_CAN_NAP, COMMON_PPC, @@ -70,6 +81,7 @@ }, { /* 603e */ 0xffff0000, 0x00060000, "603e", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_CAN_NAP, COMMON_PPC, @@ -78,6 +90,7 @@ }, { /* 603ev */ 0xffff0000, 0x00070000, "603ev", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_CAN_NAP, COMMON_PPC, @@ -86,6 +99,7 @@ }, { /* 604 */ 0xffff0000, 0x00040000, "604", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_604_PERF_MON | CPU_FTR_HPTE_TABLE, COMMON_PPC, @@ -94,6 +108,7 @@ }, { /* 604e */ 0xfffff000, 0x00090000, "604e", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_604_PERF_MON | CPU_FTR_HPTE_TABLE, COMMON_PPC, @@ -102,6 +117,7 @@ }, { /* 604r */ 0xffff0000, 0x00090000, "604r", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_604_PERF_MON | CPU_FTR_HPTE_TABLE, COMMON_PPC, @@ -110,6 +126,7 @@ }, { /* 604ev */ 0xffff0000, 0x000a0000, "604ev", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_604_PERF_MON | CPU_FTR_HPTE_TABLE, COMMON_PPC, @@ -118,6 +135,7 @@ }, { /* 740/750 (0x4202, don't support TAU ?) */ 0xffffffff, 0x00084202, "740/750", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_HPTE_TABLE | CPU_FTR_CAN_NAP, COMMON_PPC, @@ -126,6 +144,7 @@ }, { /* 745/755 */ 0xfffff000, 0x00083000, "745/755", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_CAN_NAP, COMMON_PPC, @@ -134,6 +153,7 @@ }, { /* 750CX */ 0xffffff00, 0x00082200, "750CX", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_CAN_NAP, COMMON_PPC, @@ -142,6 +162,7 @@ }, { /* 750FX rev 1.x */ 0xffffff00, 0x70000100, "750FX", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_CAN_NAP | CPU_FTR_750FX | CPU_FTR_NO_DPM, @@ -151,6 +172,7 @@ }, { /* 750FX rev 2.0 must disable HID0[DPM] */ 0xffffffff, 0x70000200, "750FX", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_CAN_NAP | CPU_FTR_750FX | CPU_FTR_HAS_HIGH_BATS | CPU_FTR_NO_DPM, @@ -160,6 +182,7 @@ }, { /* 750FX (All revs > 2.0) */ 0xffff0000, 0x70000000, "750FX", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_CAN_NAP | CPU_FTR_750FX | CPU_FTR_HAS_HIGH_BATS, @@ -169,6 +192,7 @@ }, { /* 740/750 (L2CR bit need fixup for 740) */ 0xffff0000, 0x00080000, "740/750", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_CAN_NAP, COMMON_PPC, @@ -177,6 +201,7 @@ }, { /* 7400 rev 1.1 ? (no TAU) */ 0xffffffff, 0x000c1101, "7400 (1.1)", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_HPTE_TABLE | CPU_FTR_CAN_NAP, @@ -186,6 +211,7 @@ }, { /* 7400 */ 0xffff0000, 0x000c0000, "7400", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | CPU_FTR_HPTE_TABLE | CPU_FTR_CAN_NAP, @@ -195,6 +221,7 @@ }, { /* 7410 */ 0xffff0000, 0x800c0000, "7410", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | CPU_FTR_HPTE_TABLE | CPU_FTR_CAN_NAP, @@ -204,82 +231,104 @@ }, { /* 7450 1.x - no doze/nap */ 0xffffff00, 0x80000100, "7450", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | - CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450, + CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_NEED_COHERENT, COMMON_PPC | PPC_FEATURE_HAS_ALTIVEC, 32, 32, __setup_cpu_745x }, { /* 7450 2.0 - no doze/nap */ 0xffffffff, 0x80000200, "7450", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | - CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450, + CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_NEED_COHERENT, COMMON_PPC | PPC_FEATURE_HAS_ALTIVEC, 32, 32, __setup_cpu_745x }, { /* 7450 2.1 */ 0xffffffff, 0x80000201, "7450", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | - CPU_FTR_L3_DISABLE_NAP, + CPU_FTR_L3_DISABLE_NAP | CPU_FTR_NEED_COHERENT, COMMON_PPC | PPC_FEATURE_HAS_ALTIVEC, 32, 32, __setup_cpu_745x }, { /* 7450 2.3 and newer */ 0xffff0000, 0x80000000, "7450", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | - CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR, + CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | + CPU_FTR_NEED_COHERENT, COMMON_PPC | PPC_FEATURE_HAS_ALTIVEC, 32, 32, __setup_cpu_745x }, { /* 7455 rev 1.x */ 0xffffff00, 0x80010100, "7455", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | - CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_HAS_HIGH_BATS, + CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_HAS_HIGH_BATS | + CPU_FTR_NEED_COHERENT, COMMON_PPC | PPC_FEATURE_HAS_ALTIVEC, 32, 32, __setup_cpu_745x }, { /* 7455 rev 2.0 */ 0xffffffff, 0x80010200, "7455", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | - CPU_FTR_L3_DISABLE_NAP | CPU_FTR_HAS_HIGH_BATS, + CPU_FTR_L3_DISABLE_NAP | CPU_FTR_NEED_COHERENT | CPU_FTR_HAS_HIGH_BATS, COMMON_PPC | PPC_FEATURE_HAS_ALTIVEC, 32, 32, __setup_cpu_745x }, { /* 7455 others */ 0xffff0000, 0x80010000, "7455", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | - CPU_FTR_HAS_HIGH_BATS, + CPU_FTR_HAS_HIGH_BATS | CPU_FTR_NEED_COHERENT, COMMON_PPC | PPC_FEATURE_HAS_ALTIVEC, 32, 32, __setup_cpu_745x }, { /* 7457 */ 0xffff0000, 0x80020000, "7457", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | - CPU_FTR_HAS_HIGH_BATS, + CPU_FTR_HAS_HIGH_BATS | CPU_FTR_NEED_COHERENT, + COMMON_PPC | PPC_FEATURE_HAS_ALTIVEC, + 32, 32, + __setup_cpu_745x + }, + { /* 7447A */ + 0xffff0000, 0x80030000, "7447A", + CPU_FTR_COMMON | + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_CAN_NAP | + CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | + CPU_FTR_HAS_HIGH_BATS | CPU_FTR_NEED_COHERENT, COMMON_PPC | PPC_FEATURE_HAS_ALTIVEC, 32, 32, __setup_cpu_745x }, { /* 82xx (8240, 8245, 8260 are all 603e cores) */ 0x7fff0000, 0x00810000, "82xx", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_CAN_DOZE | CPU_FTR_USE_TB, COMMON_PPC, 32, 32, @@ -295,6 +344,7 @@ }, { /* default match, we assume split I/D cache & TB (non-601)... */ 0x00000000, 0x00000000, "(generic PPC)", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE, COMMON_PPC, 32, 32, @@ -304,6 +354,7 @@ #ifdef CONFIG_PPC64BRIDGE { /* Power3 */ 0xffff0000, 0x00400000, "Power3 (630)", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE, COMMON_PPC | PPC_FEATURE_64, 128, 128, @@ -311,6 +362,7 @@ }, { /* Power3+ */ 0xffff0000, 0x00410000, "Power3 (630+)", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE, COMMON_PPC | PPC_FEATURE_64, 128, 128, @@ -318,6 +370,7 @@ }, { /* I-star */ 0xffff0000, 0x00360000, "I-star", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE, COMMON_PPC | PPC_FEATURE_64, 128, 128, @@ -325,6 +378,7 @@ }, { /* S-star */ 0xffff0000, 0x00370000, "S-star", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE, COMMON_PPC | PPC_FEATURE_64, 128, 128, @@ -334,6 +388,7 @@ #ifdef CONFIG_POWER4 { /* Power4 */ 0xffff0000, 0x00350000, "Power4", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE, COMMON_PPC | PPC_FEATURE_64, 128, 128, @@ -341,6 +396,7 @@ }, { /* PPC970 */ 0xffff0000, 0x00390000, "PPC970", + CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP, COMMON_PPC | PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC, @@ -435,7 +491,7 @@ #if !CLASSIC_PPC { /* default match */ 0x00000000, 0x00000000, "(generic PPC)", - 0, + CPU_FTR_COMMON, PPC_FEATURE_32, 32, 32, 0, diff -urN linux-2.4.27/arch/ppc/kernel/ppc_htab.c linux-2.4.28/arch/ppc/kernel/ppc_htab.c --- linux-2.4.27/arch/ppc/kernel/ppc_htab.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/ppc/kernel/ppc_htab.c 2004-11-17 03:54:21.082376678 -0800 @@ -489,7 +489,7 @@ if (!isspace(c)) break; left--; - ((char *) buffer)++; + buffer++; } if (!left) break; diff -urN linux-2.4.27/arch/ppc/mm/hashtable.S linux-2.4.28/arch/ppc/mm/hashtable.S --- linux-2.4.27/arch/ppc/mm/hashtable.S 2003-08-25 04:44:40.000000000 -0700 +++ linux-2.4.28/arch/ppc/mm/hashtable.S 2004-11-17 03:54:21.083376719 -0800 @@ -327,9 +327,9 @@ rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ ori r8,r8,0xe14 /* clear out reserved bits and M */ andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ -#ifdef CONFIG_SMP +BEGIN_FTR_SECTION ori r8,r8,_PAGE_COHERENT /* set M (coherence required) */ -#endif +END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT) /* Construct the high word of the PPC-style PTE (r5) */ #ifndef CONFIG_PPC64BRIDGE diff -urN linux-2.4.27/arch/ppc/mm/ppc_mmu.c linux-2.4.28/arch/ppc/mm/ppc_mmu.c --- linux-2.4.27/arch/ppc/mm/ppc_mmu.c 2003-11-28 10:26:19.000000000 -0800 +++ linux-2.4.28/arch/ppc/mm/ppc_mmu.c 2004-11-17 03:54:21.083376719 -0800 @@ -106,10 +106,10 @@ int wimgxpp; union ubat *bat = BATS[index]; -#ifdef CONFIG_SMP - if ((flags & _PAGE_NO_CACHE) == 0) + if (((flags & _PAGE_NO_CACHE) == 0) && + (cur_cpu_spec[0]->cpu_features & CPU_FTR_NEED_COHERENT)) flags |= _PAGE_COHERENT; -#endif + bl = (size >> 17) - 1; if (PVR_VER(mfspr(PVR)) != 1) { /* 603, 604, etc. */ diff -urN linux-2.4.27/arch/ppc/platforms/residual.c linux-2.4.28/arch/ppc/platforms/residual.c --- linux-2.4.27/arch/ppc/platforms/residual.c 2003-08-25 04:44:40.000000000 -0700 +++ linux-2.4.28/arch/ppc/platforms/residual.c 2004-11-17 03:54:21.084376760 -0800 @@ -493,7 +493,7 @@ size=tag_small_count(pkt->S1_Pack.Tag)+1; printsmallpacket(pkt, size); } - (unsigned char *) pkt+=size; + pkt = (PnP_TAG_PACKET *)((unsigned char *) pkt + size); } while (pkt->S1_Pack.Tag != END_TAG); } diff -urN linux-2.4.27/arch/ppc64/defconfig linux-2.4.28/arch/ppc64/defconfig --- linux-2.4.27/arch/ppc64/defconfig 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/ppc64/defconfig 2004-11-17 03:54:21.085376801 -0800 @@ -668,7 +668,7 @@ # CONFIG_ROOT_NFS is not set CONFIG_NFSD=y CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_SUNRPC=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y diff -urN linux-2.4.27/arch/ppc64/kernel/rtas-proc.c linux-2.4.28/arch/ppc64/kernel/rtas-proc.c --- linux-2.4.27/arch/ppc64/kernel/rtas-proc.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/ppc64/kernel/rtas-proc.c 2004-11-17 03:54:21.086376843 -0800 @@ -406,7 +406,7 @@ n = sn - pos; if (n > count) n = count; - if (copy_to_user(buf, tmpbuf + pos), n) { + if (copy_to_user(buf, tmpbuf + pos, n)) { kfree(tmpbuf); return -EFAULT; } diff -urN linux-2.4.27/arch/ppc64/kernel/sys_ppc32.c linux-2.4.28/arch/ppc64/kernel/sys_ppc32.c --- linux-2.4.27/arch/ppc64/kernel/sys_ppc32.c 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/ppc64/kernel/sys_ppc32.c 2004-11-17 03:54:21.088376925 -0800 @@ -3221,7 +3221,7 @@ PPCDBG(PPCDBG_SYS32,"sys32_setsockopt - running - pid=%ld, comm=%s\n", current->pid, current->comm); - if (optname == SO_ATTACH_FILTER) { + if (level == SOL_SOCKET && optname == SO_ATTACH_FILTER) { struct sock_fprog32 { __u16 len; __u32 filter; diff -urN linux-2.4.27/arch/s390/config.in linux-2.4.28/arch/s390/config.in --- linux-2.4.27/arch/s390/config.in 2003-11-28 10:26:19.000000000 -0800 +++ linux-2.4.28/arch/s390/config.in 2004-11-17 03:54:21.089376966 -0800 @@ -57,7 +57,7 @@ bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT bool 'Sysctl support' CONFIG_SYSCTL define_bool CONFIG_KCORE_ELF y -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC bool 'Show crashed user process info' CONFIG_PROCESS_DEBUG bool 'Pseudo page fault support' CONFIG_PFAULT diff -urN linux-2.4.27/arch/s390/defconfig linux-2.4.28/arch/s390/defconfig --- linux-2.4.27/arch/s390/defconfig 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/s390/defconfig 2004-11-17 03:54:21.090377007 -0800 @@ -129,6 +129,14 @@ # CONFIG_CHANDEV=y CONFIG_HOTPLUG=y +CONFIG_QETH=m + +# +# Gigabit Ethernet default settings +# +CONFIG_QETH_IPV6=y +CONFIG_QETH_VLAN=y +# CONFIG_QETH_PERF_STATS is not set CONFIG_CTC=m CONFIG_IUCV=m @@ -242,7 +250,6 @@ # # SCTP Configuration (EXPERIMENTAL) # -CONFIG_IPV6_SCTP__=m # CONFIG_IP_SCTP is not set # CONFIG_ATM is not set CONFIG_VLAN_8021Q=m @@ -351,7 +358,7 @@ # CONFIG_ROOT_NFS is not set CONFIG_NFSD=y CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_SUNRPC=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y diff -urN linux-2.4.27/arch/s390/kernel/entry.S linux-2.4.28/arch/s390/kernel/entry.S --- linux-2.4.27/arch/s390/kernel/entry.S 2003-06-13 07:51:32.000000000 -0700 +++ linux-2.4.28/arch/s390/kernel/entry.S 2004-11-17 03:54:21.091377048 -0800 @@ -690,7 +690,12 @@ io_int_handler: SAVE_ALL_BASE SAVE_ALL __LC_IO_OLD_PSW,0 + mc 0,0 GET_CURRENT # load pointer to task_struct to R9 + stck __LC_INT_CLOCK + clc __LC_INT_CLOCK(8),__LC_JIFFY_TIMER + bhe BASED(io_handle_tick) +io_call_handler: l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ la %r2,SP_PTREGS(%r15) # address of register-save area sr %r3,%r3 @@ -725,6 +730,15 @@ RESTORE_ALL 0 # +# account tick +# +io_handle_tick: + l %r1,BASED(.Laccount_ticks) + la %r2,SP_PTREGS(%r15) # address of register-save area + la %r14,BASED(io_call_handler) + br %r1 + +# # call do_softirq # io_handle_bottom_half: @@ -758,8 +772,13 @@ ext_int_handler: SAVE_ALL_BASE SAVE_ALL __LC_EXT_OLD_PSW,0 - GET_CURRENT # load pointer to task_struct to R9 + mc 0, 0 + GET_CURRENT # load pointer to task_struct to R9 lh %r6,__LC_EXT_INT_CODE # get interruption code + stck __LC_INT_CLOCK + clc __LC_INT_CLOCK(8),__LC_JIFFY_TIMER + bhe BASED(ext_handle_tick) +ext_call_handler: lr %r1,%r6 # calculate index = code & 0xff n %r1,BASED(.Lc0xff) sll %r1,2 @@ -770,7 +789,8 @@ ext_int_loop: ch %r6,8(%r7) # compare external interrupt code bne BASED(ext_int_next) - l %r1,4(%r7) # get handler address + icm %r1,15,4(%r7) # get handler address + bz BASED(ext_int_next) la %r2,SP_PTREGS(%r15) # address of register-save area lr %r3,%r6 # interruption code basr %r14,%r1 # call handler @@ -779,6 +799,15 @@ bnz BASED(ext_int_loop) b BASED(io_return) +# +# account tick +# +ext_handle_tick: + l %r1,BASED(.Laccount_ticks) + la %r2,SP_PTREGS(%r15) # address of register-save area + la %r14,BASED(ext_call_handler) + br %r1 + /* * Machine check handler routines */ @@ -787,6 +816,7 @@ mcck_int_handler: SAVE_ALL_BASE SAVE_ALL __LC_MCK_OLD_PSW,0 + mc 0, 0 l %r1,BASED(.Ls390_mcck) basr %r14,%r1 # call machine check handler mcck_return: @@ -869,4 +899,4 @@ .Lvfork: .long sys_vfork .Lschedtail: .long schedule_tail - +.Laccount_ticks:.long account_ticks diff -urN linux-2.4.27/arch/s390/kernel/s390_ksyms.c linux-2.4.28/arch/s390/kernel/s390_ksyms.c --- linux-2.4.27/arch/s390/kernel/s390_ksyms.c 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/s390/kernel/s390_ksyms.c 2004-11-17 03:54:21.091377048 -0800 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -66,3 +67,7 @@ EXPORT_SYMBOL(get_storage_key); EXPORT_SYMBOL_NOVERS(do_call_softirq); EXPORT_SYMBOL(sys_wait4); +EXPORT_SYMBOL(smp_call_function_on); +EXPORT_SYMBOL(show_trace); +EXPORT_SYMBOL(cpcmd); + diff -urN linux-2.4.27/arch/s390/kernel/smp.c linux-2.4.28/arch/s390/kernel/smp.c --- linux-2.4.27/arch/s390/kernel/smp.c 2002-11-28 15:53:11.000000000 -0800 +++ linux-2.4.28/arch/s390/kernel/smp.c 2004-11-17 03:54:21.092377089 -0800 @@ -92,7 +92,7 @@ extern void reipl(unsigned long devno); -static sigp_ccode smp_ext_bitcall(int, ec_bit_sig); +static void smp_ext_bitcall(int, ec_bit_sig); static void smp_ext_bitcall_others(ec_bit_sig); /* @@ -131,7 +131,7 @@ * in the system. */ -int smp_call_function (void (*func) (void *info), void *info, int nonatomic, +int smp_call_function(void (*func) (void *info), void *info, int nonatomic, int wait) /* * [SUMMARY] Run a function on all other CPUs. @@ -162,7 +162,7 @@ spin_lock_bh(&call_lock); call_data = &data; /* Send a message to all other CPUs and wait for them to respond */ - smp_ext_bitcall_others(ec_call_function); + smp_ext_bitcall_others(ec_call_function); /* Wait for response */ while (atomic_read(&data.started) != cpus) @@ -176,9 +176,54 @@ return 0; } +/* + * Call a function on one CPU + * cpu : the CPU the function should be executed on + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler, you may call it from a bottom half handler. + */ +int smp_call_function_on(void (*func) (void *info), void *info, + int nonatomic, int wait, int cpu) +{ + struct call_data_struct data; + + if (!atomic_read(&smp_commenced)) + return 0; + + if (smp_processor_id() == cpu) { + /* direct call to function */ + func(info); + return 0; + } + + data.func = func; + data.info = info; + + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + spin_lock_bh(&call_lock); + call_data = &data; + smp_ext_bitcall(cpu, ec_call_function); + + /* Wait for response */ + while (atomic_read(&data.started) != 1) + barrier(); + + if (wait) + while (atomic_read(&data.finished) != 1) + barrier(); + + spin_unlock_bh(&call_lock); + return 0; +} + static inline void do_send_stop(void) { - u32 dummy; + unsigned long dummy; int i; /* stop all processors */ @@ -199,7 +244,7 @@ static inline void do_store_status(void) { unsigned long low_core_addr; - u32 dummy; + unsigned long dummy; int i; /* store status of all processors in their lowcores (real 0) */ @@ -328,42 +373,41 @@ } /* - * Send an external call sigp to another cpu and return without waiting + * Send an external call sigp to another cpu and wait * for its completion. */ -static sigp_ccode smp_ext_bitcall(int cpu, ec_bit_sig sig) +static void smp_ext_bitcall(int cpu, ec_bit_sig sig) { - struct _lowcore *lowcore = get_cpu_lowcore(cpu); - sigp_ccode ccode; + struct _lowcore *lowcore = get_cpu_lowcore(cpu); - /* - * Set signaling bit in lowcore of target cpu and kick it - */ - atomic_set_mask(1<ext_call_fast); - ccode = signal_processor(cpu, sigp_external_call); - return ccode; + /* + * Set signaling bit in lowcore of target cpu and kick it + */ + atomic_set_mask(1<ext_call_fast); + while(signal_processor(cpu, sigp_external_call) == sigp_busy) + udelay(10); } /* * Send an external call sigp to every other cpu in the system and - * return without waiting for its completion. + * wait for its completion. */ static void smp_ext_bitcall_others(ec_bit_sig sig) { - struct _lowcore *lowcore; - int i; + struct _lowcore *lowcore; + int i; - for (i = 0; i < smp_num_cpus; i++) { - if (smp_processor_id() == i) - continue; - lowcore = get_cpu_lowcore(i); - /* - * Set signaling bit in lowcore of target cpu and kick it - */ - atomic_set_mask(1<ext_call_fast); - while (signal_processor(i, sigp_external_call) == sigp_busy) + for (i = 0; i < smp_num_cpus; i++) { + if (smp_processor_id() == i) + continue; + lowcore = get_cpu_lowcore(i); + /* + * Set signaling bit in lowcore of target cpu and kick it + */ + atomic_set_mask(1<ext_call_fast); + while (signal_processor(i, sigp_external_call) == sigp_busy) udelay(10); - } + } } /* diff -urN linux-2.4.27/arch/s390/kernel/time.c linux-2.4.28/arch/s390/kernel/time.c --- linux-2.4.27/arch/s390/kernel/time.c 2003-06-13 07:51:32.000000000 -0700 +++ linux-2.4.28/arch/s390/kernel/time.c 2004-11-17 03:54:21.093377130 -0800 @@ -34,13 +34,20 @@ #include /* change this if you have some constant time drift */ -#define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) #define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12) +/* + * Create a small time difference between the timer interrupts + * on the different cpus to avoid lock contention. + */ +#define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) +#define CPU_DEVIATION (smp_processor_id() << 12) + #define TICK_SIZE tick static ext_int_info_t ext_int_info_timer; -static uint64_t init_timer_cc; +static u64 init_timer_cc; +static u64 xtime_cc; extern rwlock_t xtime_lock; extern unsigned long wall_jiffies; @@ -83,7 +90,7 @@ { __u64 now; - asm ("STCK 0(%0)" : : "a" (&now) : "memory", "cc"); + asm volatile ("STCK 0(%0)" : : "a" (&now) : "memory", "cc"); now = (now - init_timer_cc) >> 12; /* We require the offset from the latest update of xtime */ now -= (__u64) wall_jiffies*USECS_PER_JIFFY; @@ -137,41 +144,72 @@ write_unlock_irq(&xtime_lock); } +static inline __u32 div64_32(__u64 dividend, __u32 divisor) +{ + register_pair rp; + + rp.pair = dividend; + asm ("dr %0,%1" : "+d" (rp) : "d" (divisor)); + return rp.subreg.odd; +} + /* * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ - -#ifdef CONFIG_SMP -extern __u16 boot_cpu_addr; -#endif - -static void do_comparator_interrupt(struct pt_regs *regs, __u16 error_code) +void account_ticks(struct pt_regs *regs) { int cpu = smp_processor_id(); + __u64 tmp; + __u32 ticks; + + /* Calculate how many ticks have passed. */ + tmp = S390_lowcore.int_clock - S390_lowcore.jiffy_timer; + if (tmp >= 2*CLK_TICKS_PER_JIFFY) { + ticks = div64_32(tmp >> 1, CLK_TICKS_PER_JIFFY >> 1) + 1; + S390_lowcore.jiffy_timer += + CLK_TICKS_PER_JIFFY * (__u64) ticks; + } else if (tmp > CLK_TICKS_PER_JIFFY) { + ticks = 2; + S390_lowcore.jiffy_timer += 2*CLK_TICKS_PER_JIFFY; + } else { + ticks = 1; + S390_lowcore.jiffy_timer += CLK_TICKS_PER_JIFFY; + } + + /* set clock comparator for next tick */ + tmp = S390_lowcore.jiffy_timer + CPU_DEVIATION; + asm volatile ("SCKC %0" : : "m" (tmp)); irq_enter(cpu, 0); +#ifdef CONFIG_SMP /* - * set clock comparator for next tick + * Do not rely on the boot cpu to do the calls to do_timer. + * Spread it over all cpus instead. */ - S390_lowcore.jiffy_timer += CLK_TICKS_PER_JIFFY; - asm volatile ("SCKC %0" : : "m" (S390_lowcore.jiffy_timer)); - -#ifdef CONFIG_SMP - if (S390_lowcore.cpu_data.cpu_addr == boot_cpu_addr) - write_lock(&xtime_lock); - - update_process_times(user_mode(regs)); - - if (S390_lowcore.cpu_data.cpu_addr == boot_cpu_addr) { - do_timer(regs); - write_unlock(&xtime_lock); + write_lock(&xtime_lock); + if (S390_lowcore.jiffy_timer > xtime_cc) { + __u32 xticks; + + tmp = S390_lowcore.jiffy_timer - xtime_cc; + if (tmp >= 2*CLK_TICKS_PER_JIFFY) { + xticks = div64_32(tmp >> 1, CLK_TICKS_PER_JIFFY >> 1); + xtime_cc += (__u64) xticks * CLK_TICKS_PER_JIFFY; + } else { + xticks = 1; + xtime_cc += CLK_TICKS_PER_JIFFY; + } + while (xticks--) + do_timer(regs); } + write_unlock(&xtime_lock); + while (ticks--) + update_process_times(user_mode(regs)); #else - do_timer(regs); + while (ticks--) + do_timer(regs); #endif - irq_exit(cpu, 0); } @@ -180,11 +218,13 @@ */ void init_cpu_timer(void) { - unsigned long cr0; + unsigned long cr0; + __u64 timer; - S390_lowcore.jiffy_timer = (__u64) jiffies * CLK_TICKS_PER_JIFFY; - S390_lowcore.jiffy_timer += init_timer_cc + CLK_TICKS_PER_JIFFY; - asm volatile ("SCKC %0" : : "m" (S390_lowcore.jiffy_timer)); + timer = init_timer_cc + (__u64) jiffies * CLK_TICKS_PER_JIFFY; + S390_lowcore.jiffy_timer = timer + CLK_TICKS_PER_JIFFY; + timer += CLK_TICKS_PER_JIFFY + CPU_DEVIATION; + asm volatile ("SCKC %0" : : "m" (timer)); /* allow clock comparator timer interrupt */ asm volatile ("STCTL 0,0,%0" : "=m" (cr0) : : "memory"); cr0 |= 0x800; @@ -220,12 +260,13 @@ } /* set xtime */ + xtime_cc = init_timer_cc + CLK_TICKS_PER_JIFFY; set_time_cc = init_timer_cc - 0x8126d60e46000000LL + (0x3c26700LL*1000000*4096); tod_to_timeval(set_time_cc, &xtime); /* request the 0x1004 external interrupt */ - if (register_early_external_interrupt(0x1004, do_comparator_interrupt, + if (register_early_external_interrupt(0x1004, NULL, &ext_int_info_timer) != 0) panic("Couldn't request external interrupt 0x1004"); diff -urN linux-2.4.27/arch/s390x/config.in linux-2.4.28/arch/s390x/config.in --- linux-2.4.27/arch/s390x/config.in 2003-11-28 10:26:19.000000000 -0800 +++ linux-2.4.28/arch/s390x/config.in 2004-11-17 03:54:21.093377130 -0800 @@ -60,7 +60,7 @@ bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT bool 'Sysctl support' CONFIG_SYSCTL define_bool CONFIG_KCORE_ELF y -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC bool 'Show crashed user process info' CONFIG_PROCESS_DEBUG bool 'Pseudo page fault support' CONFIG_PFAULT diff -urN linux-2.4.27/arch/s390x/defconfig linux-2.4.28/arch/s390x/defconfig --- linux-2.4.27/arch/s390x/defconfig 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/s390x/defconfig 2004-11-17 03:54:21.094377171 -0800 @@ -129,6 +129,14 @@ # CONFIG_CHANDEV=y CONFIG_HOTPLUG=y +CONFIG_QETH=m + +# +# Gigabit Ethernet default settings +# +CONFIG_QETH_IPV6=y +CONFIG_QETH_VLAN=y +# CONFIG_QETH_PERF_STATS is not set CONFIG_CTC=m CONFIG_IUCV=m @@ -291,7 +299,7 @@ # CONFIG_ROOT_NFS is not set CONFIG_NFSD=y CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_SUNRPC=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y diff -urN linux-2.4.27/arch/s390x/kernel/entry.S linux-2.4.28/arch/s390x/kernel/entry.S --- linux-2.4.27/arch/s390x/kernel/entry.S 2003-08-25 04:44:40.000000000 -0700 +++ linux-2.4.28/arch/s390x/kernel/entry.S 2004-11-17 03:54:21.095377213 -0800 @@ -722,7 +722,12 @@ .globl io_int_handler io_int_handler: SAVE_ALL __LC_IO_OLD_PSW,0 + mc 0, 0 GET_CURRENT # load pointer to task_struct to R9 + stck __LC_INT_CLOCK + clc __LC_INT_CLOCK(8),__LC_JIFFY_TIMER + jhe io_handle_tick +io_call_handler: la %r2,SP_PTREGS(%r15) # address of register-save area llgh %r3,__LC_SUBCHANNEL_NR # load subchannel number llgf %r4,__LC_IO_INT_PARM # load interuption parm @@ -780,14 +785,27 @@ larl %r14,io_leave jg do_signal # return point is io_leave +# +# account tick +# +io_handle_tick: + la %r2,SP_PTREGS(%r15) # address of register-save area + larl %r14,io_call_handler + jg account_ticks + /* * External interrupt handler routine */ .globl ext_int_handler ext_int_handler: SAVE_ALL __LC_EXT_OLD_PSW,0 + mc 0, 0 GET_CURRENT # load pointer to task_struct to R9 llgh %r6,__LC_EXT_INT_CODE # get interruption code + stck __LC_INT_CLOCK + clc __LC_INT_CLOCK(8),__LC_JIFFY_TIMER + jhe ext_handle_tick +ext_call_handler: lgr %r1,%r6 # calculate index = code & 0xff nill %r1,0xff sll %r1,3 @@ -799,6 +817,8 @@ ch %r6,16(%r7) # compare external interrupt code jne ext_int_next lg %r1,8(%r7) # get handler address + ltgr %r1,%r1 + jz ext_int_next la %r2,SP_PTREGS(%r15) # address of register-save area lgr %r3,%r6 # interruption code basr %r14,%r1 # call handler @@ -808,12 +828,21 @@ jnz ext_int_loop j io_return +# +# account tick +# +ext_handle_tick: + la %r2,SP_PTREGS(%r15) # address of register-save area + larl %r14,ext_call_handler + jg account_ticks + /* * Machine check handler routines */ .globl mcck_int_handler mcck_int_handler: SAVE_ALL __LC_MCK_OLD_PSW,0 + mc 0, 0 brasl %r14,s390_do_machine_check mcck_return: RESTORE_ALL 0 diff -urN linux-2.4.27/arch/s390x/kernel/ioctl32.c linux-2.4.28/arch/s390x/kernel/ioctl32.c --- linux-2.4.27/arch/s390x/kernel/ioctl32.c 2003-08-25 04:44:40.000000000 -0700 +++ linux-2.4.28/arch/s390x/kernel/ioctl32.c 2004-11-17 03:54:21.096377254 -0800 @@ -34,6 +34,7 @@ #include #include #include +#include "../../../drivers/s390/char/tubio.h" #include "linux32.h" @@ -535,6 +536,12 @@ IOCTL32_HANDLER(HDIO_GETGEO, hd_geometry_ioctl), + IOCTL32_DEFAULT(TUBICMD), + IOCTL32_DEFAULT(TUBOCMD), + IOCTL32_DEFAULT(TUBGETI), + IOCTL32_DEFAULT(TUBGETO), + IOCTL32_DEFAULT(TUBSETMOD), + IOCTL32_DEFAULT(TUBGETMOD), IOCTL32_DEFAULT(TCGETA), IOCTL32_DEFAULT(TCSETA), IOCTL32_DEFAULT(TCSETAW), diff -urN linux-2.4.27/arch/s390x/kernel/linux32.c linux-2.4.28/arch/s390x/kernel/linux32.c --- linux-2.4.27/arch/s390x/kernel/linux32.c 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/s390x/kernel/linux32.c 2004-11-17 03:54:21.099377377 -0800 @@ -4427,7 +4427,7 @@ ret = sys_newstat(tmp, &s); set_fs (old_fs); putname(tmp); - if (putstat64 (statbuf, &s)) + if (!ret && putstat64 (statbuf, &s)) return -EFAULT; return ret; } @@ -4451,7 +4451,7 @@ ret = sys_newlstat(tmp, &s); set_fs (old_fs); putname(tmp); - if (putstat64 (statbuf, &s)) + if (!ret && putstat64 (statbuf, &s)) return -EFAULT; return ret; } @@ -4467,7 +4467,7 @@ set_fs (KERNEL_DS); ret = sys_newfstat(fd, &s); set_fs (old_fs); - if (putstat64 (statbuf, &s)) + if (!ret && putstat64 (statbuf, &s)) return -EFAULT; return ret; } @@ -4507,7 +4507,7 @@ error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); if (!IS_ERR((void *) error) && error + len >= 0x80000000ULL) { /* Result is out of bounds. */ - do_munmap(current->mm, addr, len); + do_munmap(current->mm, error, len); error = -ENOMEM; } up_write(¤t->mm->mmap_sem); diff -urN linux-2.4.27/arch/s390x/kernel/s390_ksyms.c linux-2.4.28/arch/s390x/kernel/s390_ksyms.c --- linux-2.4.27/arch/s390x/kernel/s390_ksyms.c 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/s390x/kernel/s390_ksyms.c 2004-11-17 03:54:21.099377377 -0800 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -83,3 +84,7 @@ EXPORT_SYMBOL(console_device); EXPORT_SYMBOL_NOVERS(do_call_softirq); EXPORT_SYMBOL(sys_wait4); +EXPORT_SYMBOL(smp_call_function_on); +EXPORT_SYMBOL(show_trace); +EXPORT_SYMBOL(cpcmd); + diff -urN linux-2.4.27/arch/s390x/kernel/smp.c linux-2.4.28/arch/s390x/kernel/smp.c --- linux-2.4.27/arch/s390x/kernel/smp.c 2003-06-13 07:51:32.000000000 -0700 +++ linux-2.4.28/arch/s390x/kernel/smp.c 2004-11-17 03:54:21.100377418 -0800 @@ -92,7 +92,7 @@ extern void reipl(unsigned long devno); -static sigp_ccode smp_ext_bitcall(int, ec_bit_sig); +static void smp_ext_bitcall(int, ec_bit_sig); static void smp_ext_bitcall_others(ec_bit_sig); /* @@ -131,7 +131,7 @@ * in the system. */ -int smp_call_function (void (*func) (void *info), void *info, int nonatomic, +int smp_call_function(void (*func) (void *info), void *info, int nonatomic, int wait) /* * [SUMMARY] Run a function on all other CPUs. @@ -176,46 +176,92 @@ return 0; } +/* + * Call a function only on one CPU + * cpu : the CPU the function should be executed on + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler, you may call it from a bottom half handler. + */ +int smp_call_function_on(void (*func) (void *info), void *info, + int nonatomic, int wait, int cpu) +{ + struct call_data_struct data; + + if (!atomic_read(&smp_commenced)) + return 0; + + if (smp_processor_id() == cpu) { + /* direct call to function */ + func(info); + return 0; + } + + data.func = func; + data.info = info; + + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + spin_lock_bh(&call_lock); + call_data = &data; + smp_ext_bitcall(cpu, ec_call_function); + + /* Wait for response */ + while (atomic_read(&data.started) != 1) + barrier(); + + if (wait) + while (atomic_read(&data.finished) != 1) + barrier(); + + spin_unlock_bh(&call_lock); + return 0; +} + + static inline void do_send_stop(void) { - u32 dummy; - int i; + unsigned long dummy; + int i; - /* stop all processors */ - for (i = 0; i < smp_num_cpus; i++) { - if (smp_processor_id() != i) { - int ccode; - do { - ccode = signal_processor_ps( - &dummy, - 0, - i, - sigp_stop); - } while(ccode == sigp_busy); - } - } + /* stop all processors */ + for (i = 0; i < smp_num_cpus; i++) { + if (smp_processor_id() != i) { + int ccode; + do { + ccode = signal_processor_ps( + &dummy, + 0, + i, + sigp_stop); + } while(ccode == sigp_busy); + } + } } static inline void do_store_status(void) { - unsigned long low_core_addr; - u32 dummy; - int i; + unsigned long low_core_addr; + unsigned long dummy; + int i; - /* store status of all processors in their lowcores (real 0) */ - for (i = 0; i < smp_num_cpus; i++) { - if (smp_processor_id() != i) { - int ccode; - low_core_addr = (unsigned long)get_cpu_lowcore(i); - do { - ccode = signal_processor_ps( - &dummy, - low_core_addr, - i, - sigp_store_status_at_address); - } while(ccode == sigp_busy); - } - } + /* store status of all processors in their lowcores (real 0) */ + for (i = 0; i < smp_num_cpus; i++) { + if (smp_processor_id() != i) { + int ccode; + low_core_addr = (unsigned long)get_cpu_lowcore(i); + do { + ccode = signal_processor_ps( + &dummy, + low_core_addr, + i, + sigp_store_status_at_address); + } while(ccode == sigp_busy); + } + } } /* @@ -224,8 +270,8 @@ */ void smp_send_stop(void) { - /* write magic number to zero page (absolute 0) */ - get_cpu_lowcore(smp_processor_id())->panic_magic = __PANIC_MAGIC; + /* write magic number to zero page (absolute 0) */ + get_cpu_lowcore(smp_processor_id())->panic_magic = __PANIC_MAGIC; /* stop other processors. */ do_send_stop(); @@ -263,7 +309,7 @@ void machine_restart_smp(char * __unused) { cpu_restart_map = cpu_online_map; - smp_call_function(do_machine_restart, NULL, 0, 0); + smp_call_function(do_machine_restart, NULL, 0, 0); do_machine_restart(NULL); } @@ -282,7 +328,7 @@ void machine_halt_smp(void) { - smp_call_function(do_machine_halt, NULL, 0, 0); + smp_call_function(do_machine_halt, NULL, 0, 0); do_machine_halt(NULL); } @@ -301,7 +347,7 @@ void machine_power_off_smp(void) { - smp_call_function(do_machine_power_off, NULL, 0, 0); + smp_call_function(do_machine_power_off, NULL, 0, 0); do_machine_power_off(NULL); } @@ -312,55 +358,52 @@ void do_ext_call_interrupt(struct pt_regs *regs, __u16 code) { - unsigned long bits; + unsigned long bits; - /* - * handle bit signal external calls - * - * For the ec_schedule signal we have to do nothing. All the work - * is done automatically when we return from the interrupt. - */ + /* + * handle bit signal external calls + * + * For the ec_schedule signal we have to do nothing. All the work + * is done automatically when we return from the interrupt. + */ bits = xchg(&S390_lowcore.ext_call_fast, 0); - if (test_bit(ec_call_function, &bits)) + if (test_bit(ec_call_function, &bits)) do_call_function(); } /* - * Send an external call sigp to another cpu and return without waiting + * Send an external call sigp to another cpu and wait * for its completion. */ -static sigp_ccode smp_ext_bitcall(int cpu, ec_bit_sig sig) +static void smp_ext_bitcall(int cpu, ec_bit_sig sig) { - sigp_ccode ccode; - - /* - * Set signaling bit in lowcore of target cpu and kick it - */ + /* + * Set signaling bit in lowcore of target cpu and kick it + */ set_bit(sig, &(get_cpu_lowcore(cpu)->ext_call_fast)); - ccode = signal_processor(cpu, sigp_external_call); - return ccode; + while (signal_processor(cpu, sigp_external_call) == sigp_busy) + udelay(10); } /* * Send an external call sigp to every other cpu in the system and - * return without waiting for its completion. + * wait for its completion. */ static void smp_ext_bitcall_others(ec_bit_sig sig) { - sigp_ccode ccode; - int i; + int i; - for (i = 0; i < smp_num_cpus; i++) { - if (smp_processor_id() == i) - continue; - /* - * Set signaling bit in lowcore of target cpu and kick it - */ + for (i = 0; i < smp_num_cpus; i++) { + if (smp_processor_id() == i) + continue; + /* + * Set signaling bit in lowcore of target cpu and kick it + */ set_bit(sig, &(get_cpu_lowcore(i)->ext_call_fast)); - while (signal_processor(i, sigp_external_call) == sigp_busy) + while (signal_processor(i, sigp_external_call) == sigp_busy) udelay(10); - } + } } /* @@ -650,3 +693,4 @@ EXPORT_SYMBOL(smp_ctl_clear_bit); EXPORT_SYMBOL(smp_num_cpus); EXPORT_SYMBOL(smp_call_function); + diff -urN linux-2.4.27/arch/s390x/kernel/time.c linux-2.4.28/arch/s390x/kernel/time.c --- linux-2.4.27/arch/s390x/kernel/time.c 2003-06-13 07:51:32.000000000 -0700 +++ linux-2.4.28/arch/s390x/kernel/time.c 2004-11-17 03:54:21.101377459 -0800 @@ -37,10 +37,17 @@ #define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) #define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12) +/* + * Create a small time difference between the timer interrupts + * on the different cpus to avoid lock contention. + */ +#define CPU_DEVIATION (smp_processor_id() << 12) + #define TICK_SIZE tick static ext_int_info_t ext_int_info_timer; -static uint64_t init_timer_cc; +static u64 init_timer_cc; +static u64 xtime_cc; extern rwlock_t xtime_lock; extern unsigned long wall_jiffies; @@ -56,7 +63,7 @@ { __u64 now; - asm ("STCK 0(%0)" : : "a" (&now) : "memory", "cc"); + asm volatile ("STCK 0(%0)" : : "a" (&now) : "memory", "cc"); now = (now - init_timer_cc) >> 12; /* We require the offset from the latest update of xtime */ now -= (__u64) wall_jiffies*USECS_PER_JIFFY; @@ -114,35 +121,58 @@ * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ - -#ifdef CONFIG_SMP -extern __u16 boot_cpu_addr; -#endif - -static void do_comparator_interrupt(struct pt_regs *regs, __u16 error_code) +void account_ticks(struct pt_regs *regs) { int cpu = smp_processor_id(); + __u64 tmp; + __u32 ticks; + + /* Calculate how many ticks have passed. */ + tmp = S390_lowcore.int_clock - S390_lowcore.jiffy_timer; + if (tmp >= 2*CLK_TICKS_PER_JIFFY) { /* more than one tick ? */ + ticks = tmp / CLK_TICKS_PER_JIFFY + 1; + S390_lowcore.jiffy_timer += + CLK_TICKS_PER_JIFFY * (__u64) ticks; + } else if (tmp > CLK_TICKS_PER_JIFFY) { + ticks = 2; + S390_lowcore.jiffy_timer += 2*CLK_TICKS_PER_JIFFY; + } else { + ticks = 1; + S390_lowcore.jiffy_timer += CLK_TICKS_PER_JIFFY; + } + + /* set clock comparator for next tick */ + tmp = S390_lowcore.jiffy_timer + CPU_DEVIATION; + asm volatile ("SCKC %0" : : "m" (tmp)); irq_enter(cpu, 0); +#ifdef CONFIG_SMP /* - * set clock comparator for next tick + * Do not rely on the boot cpu to do the calls to do_timer. + * Spread it over all cpus instead. */ - S390_lowcore.jiffy_timer += CLK_TICKS_PER_JIFFY; - asm volatile ("SCKC %0" : : "m" (S390_lowcore.jiffy_timer)); - -#ifdef CONFIG_SMP - if (S390_lowcore.cpu_data.cpu_addr == boot_cpu_addr) - write_lock(&xtime_lock); - - update_process_times(user_mode(regs)); - - if (S390_lowcore.cpu_data.cpu_addr == boot_cpu_addr) { - do_timer(regs); - write_unlock(&xtime_lock); + write_lock(&xtime_lock); + if (S390_lowcore.jiffy_timer > xtime_cc) { + __u32 xticks; + + tmp = S390_lowcore.jiffy_timer - xtime_cc; + if (tmp >= 2*CLK_TICKS_PER_JIFFY) { + xticks = tmp / CLK_TICKS_PER_JIFFY; + xtime_cc += (__u64) xticks * CLK_TICKS_PER_JIFFY; + } else { + xticks = 1; + xtime_cc += CLK_TICKS_PER_JIFFY; + } + while (xticks--) + do_timer(regs); } + write_unlock(&xtime_lock); + while (ticks--) + update_process_times(user_mode(regs)); #else - do_timer(regs); + while (ticks--) + do_timer(regs); #endif irq_exit(cpu, 0); @@ -153,11 +183,13 @@ */ void init_cpu_timer(void) { - unsigned long cr0; + unsigned long cr0; + __u64 timer; - S390_lowcore.jiffy_timer = (__u64) jiffies * CLK_TICKS_PER_JIFFY; - S390_lowcore.jiffy_timer += init_timer_cc + CLK_TICKS_PER_JIFFY; - asm volatile ("SCKC %0" : : "m" (S390_lowcore.jiffy_timer)); + timer = init_timer_cc + (__u64) jiffies * CLK_TICKS_PER_JIFFY; + S390_lowcore.jiffy_timer = timer + CLK_TICKS_PER_JIFFY; + timer += CLK_TICKS_PER_JIFFY + CPU_DEVIATION; + asm volatile ("SCKC %0" : : "m" (timer)); /* allow clock comparator timer interrupt */ asm volatile ("STCTG 0,0,%0" : "=m" (cr0) : : "memory"); cr0 |= 0x800; @@ -193,12 +225,13 @@ } /* set xtime */ + xtime_cc = init_timer_cc + CLK_TICKS_PER_JIFFY; set_time_cc = init_timer_cc - 0x8126d60e46000000LL + (0x3c26700LL*1000000*4096); tod_to_timeval(set_time_cc, &xtime); /* request the 0x1004 external interrupt */ - if (register_early_external_interrupt(0x1004, do_comparator_interrupt, + if (register_early_external_interrupt(0x1004, NULL, &ext_int_info_timer) != 0) panic("Couldn't request external interrupt 0x1004"); diff -urN linux-2.4.27/arch/sh/config.in linux-2.4.28/arch/sh/config.in --- linux-2.4.27/arch/sh/config.in 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/sh/config.in 2004-11-17 03:54:21.102377500 -0800 @@ -282,7 +282,7 @@ "ELF CONFIG_KCORE_ELF \ A.OUT CONFIG_KCORE_AOUT" ELF fi -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC bool 'Select task to kill on out of memory condition' CONFIG_OOM_KILLER diff -urN linux-2.4.27/arch/sh64/config.in linux-2.4.28/arch/sh64/config.in --- linux-2.4.27/arch/sh64/config.in 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/sh64/config.in 2004-11-17 03:54:21.102377500 -0800 @@ -150,7 +150,7 @@ "ELF CONFIG_KCORE_ELF \ A.OUT CONFIG_KCORE_AOUT" ELF fi -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC bool 'Select task to kill on out of memory condition' CONFIG_OOM_KILLER diff -urN linux-2.4.27/arch/sh64/defconfig linux-2.4.28/arch/sh64/defconfig --- linux-2.4.27/arch/sh64/defconfig 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/sh64/defconfig 2004-11-17 03:54:21.103377542 -0800 @@ -386,7 +386,7 @@ CONFIG_ROOT_NFS=y # CONFIG_NFSD is not set # CONFIG_NFSD_V3 is not set -# CONFIG_NFSD_TCP is not set +CONFIG_NFSD_TCP=y CONFIG_SUNRPC=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y diff -urN linux-2.4.27/arch/sparc/config.in linux-2.4.28/arch/sparc/config.in --- linux-2.4.27/arch/sparc/config.in 2004-02-18 05:36:30.000000000 -0800 +++ linux-2.4.28/arch/sparc/config.in 2004-11-17 03:54:21.103377542 -0800 @@ -73,7 +73,7 @@ define_bool CONFIG_KCORE_ELF y fi tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC bool 'SunOS binary emulation' CONFIG_SUNOS_EMUL bool 'Select task to kill on out of memory condition' CONFIG_OOM_KILLER diff -urN linux-2.4.27/arch/sparc/lib/copy_user.S linux-2.4.28/arch/sparc/lib/copy_user.S --- linux-2.4.27/arch/sparc/lib/copy_user.S 2003-11-28 10:26:19.000000000 -0800 +++ linux-2.4.28/arch/sparc/lib/copy_user.S 2004-11-17 03:54:21.104377583 -0800 @@ -65,52 +65,52 @@ /* Both these macros have to start with exactly the same insn */ #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src + offset + 0x00], %t0; \ - ldd [%src + offset + 0x08], %t2; \ - ldd [%src + offset + 0x10], %t4; \ - ldd [%src + offset + 0x18], %t6; \ - st %t0, [%dst + offset + 0x00]; \ - st %t1, [%dst + offset + 0x04]; \ - st %t2, [%dst + offset + 0x08]; \ - st %t3, [%dst + offset + 0x0c]; \ - st %t4, [%dst + offset + 0x10]; \ - st %t5, [%dst + offset + 0x14]; \ - st %t6, [%dst + offset + 0x18]; \ - st %t7, [%dst + offset + 0x1c]; + ldd [%src + (offset) + 0x00], %t0; \ + ldd [%src + (offset) + 0x08], %t2; \ + ldd [%src + (offset) + 0x10], %t4; \ + ldd [%src + (offset) + 0x18], %t6; \ + st %t0, [%dst + (offset) + 0x00]; \ + st %t1, [%dst + (offset) + 0x04]; \ + st %t2, [%dst + (offset) + 0x08]; \ + st %t3, [%dst + (offset) + 0x0c]; \ + st %t4, [%dst + (offset) + 0x10]; \ + st %t5, [%dst + (offset) + 0x14]; \ + st %t6, [%dst + (offset) + 0x18]; \ + st %t7, [%dst + (offset) + 0x1c]; #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src + offset + 0x00], %t0; \ - ldd [%src + offset + 0x08], %t2; \ - ldd [%src + offset + 0x10], %t4; \ - ldd [%src + offset + 0x18], %t6; \ - std %t0, [%dst + offset + 0x00]; \ - std %t2, [%dst + offset + 0x08]; \ - std %t4, [%dst + offset + 0x10]; \ - std %t6, [%dst + offset + 0x18]; + ldd [%src + (offset) + 0x00], %t0; \ + ldd [%src + (offset) + 0x08], %t2; \ + ldd [%src + (offset) + 0x10], %t4; \ + ldd [%src + (offset) + 0x18], %t6; \ + std %t0, [%dst + (offset) + 0x00]; \ + std %t2, [%dst + (offset) + 0x08]; \ + std %t4, [%dst + (offset) + 0x10]; \ + std %t6, [%dst + (offset) + 0x18]; #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldd [%src - offset - 0x10], %t0; \ - ldd [%src - offset - 0x08], %t2; \ - st %t0, [%dst - offset - 0x10]; \ - st %t1, [%dst - offset - 0x0c]; \ - st %t2, [%dst - offset - 0x08]; \ - st %t3, [%dst - offset - 0x04]; + ldd [%src - (offset) - 0x10], %t0; \ + ldd [%src - (offset) - 0x08], %t2; \ + st %t0, [%dst - (offset) - 0x10]; \ + st %t1, [%dst - (offset) - 0x0c]; \ + st %t2, [%dst - (offset) - 0x08]; \ + st %t3, [%dst - (offset) - 0x04]; #define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ - lduh [%src + offset + 0x00], %t0; \ - lduh [%src + offset + 0x02], %t1; \ - lduh [%src + offset + 0x04], %t2; \ - lduh [%src + offset + 0x06], %t3; \ - sth %t0, [%dst + offset + 0x00]; \ - sth %t1, [%dst + offset + 0x02]; \ - sth %t2, [%dst + offset + 0x04]; \ - sth %t3, [%dst + offset + 0x06]; + lduh [%src + (offset) + 0x00], %t0; \ + lduh [%src + (offset) + 0x02], %t1; \ + lduh [%src + (offset) + 0x04], %t2; \ + lduh [%src + (offset) + 0x06], %t3; \ + sth %t0, [%dst + (offset) + 0x00]; \ + sth %t1, [%dst + (offset) + 0x02]; \ + sth %t2, [%dst + (offset) + 0x04]; \ + sth %t3, [%dst + (offset) + 0x06]; #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ - ldub [%src - offset - 0x02], %t0; \ - ldub [%src - offset - 0x01], %t1; \ - stb %t0, [%dst - offset - 0x02]; \ - stb %t1, [%dst - offset - 0x01]; + ldub [%src - (offset) - 0x02], %t0; \ + ldub [%src - (offset) - 0x01], %t1; \ + stb %t0, [%dst - (offset) - 0x02]; \ + stb %t1, [%dst - (offset) - 0x01]; .text .align 4 diff -urN linux-2.4.27/arch/sparc64/config.in linux-2.4.28/arch/sparc64/config.in --- linux-2.4.27/arch/sparc64/config.in 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/sparc64/config.in 2004-11-17 03:54:21.105377624 -0800 @@ -78,7 +78,7 @@ tristate ' Kernel support for 32-bit ELF binaries' CONFIG_BINFMT_ELF32 bool ' Kernel support for 32-bit (ie. SunOS) a.out binaries' CONFIG_BINFMT_AOUT32 fi -tristate 'Kernel support for 64-bit ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for 64-bit ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC bool 'SunOS binary emulation' CONFIG_SUNOS_EMUL if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then diff -urN linux-2.4.27/arch/sparc64/defconfig linux-2.4.28/arch/sparc64/defconfig --- linux-2.4.27/arch/sparc64/defconfig 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/sparc64/defconfig 2004-11-17 03:54:21.106377665 -0800 @@ -375,7 +375,7 @@ CONFIG_NET_SCH_TEQL=m CONFIG_NET_SCH_TBF=m CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DELAY=m +CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_QOS=y @@ -410,6 +410,7 @@ # # CONFIG_BLK_DEV_HD_IDE is not set # CONFIG_BLK_DEV_HD is not set +# CONFIG_BLK_DEV_IDE_SATA is not set CONFIG_BLK_DEV_IDEDISK=y # CONFIG_IDEDISK_MULTI_MODE is not set # CONFIG_IDEDISK_STROKE is not set @@ -1134,6 +1135,8 @@ CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_MICHAEL_MIC=m diff -urN linux-2.4.27/arch/sparc64/kernel/head.S linux-2.4.28/arch/sparc64/kernel/head.S --- linux-2.4.27/arch/sparc64/kernel/head.S 2004-02-18 05:36:31.000000000 -0800 +++ linux-2.4.28/arch/sparc64/kernel/head.S 2004-11-17 03:54:21.106377665 -0800 @@ -50,7 +50,7 @@ */ .global root_flags, ram_flags, root_dev .global sparc_ramdisk_image, sparc_ramdisk_size - .globl silo_args + .global sparc_ramdisk_image64 .ascii "HdrS" .word LINUX_VERSION_CODE @@ -61,7 +61,7 @@ * 0x0202 : Supports kernel params string * 0x0201 : Supports reboot_command */ - .half 0x0300 /* HdrS version */ + .half 0x0301 /* HdrS version */ root_flags: .half 1 @@ -75,6 +75,8 @@ .word 0 .xword reboot_command .xword bootstr_info +sparc_ramdisk_image64: + .xword 0 .word _end /* We must be careful, 32-bit OpenBOOT will get confused if it diff -urN linux-2.4.27/arch/sparc64/kernel/ioctl32.c linux-2.4.28/arch/sparc64/kernel/ioctl32.c --- linux-2.4.27/arch/sparc64/kernel/ioctl32.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/sparc64/kernel/ioctl32.c 2004-11-17 03:54:21.110377829 -0800 @@ -2948,7 +2948,7 @@ case LV_REMOVE: case LV_RENAME: case LV_STATUS_BYNAME: - err = copy_from_user(&u.pv_status, arg, sizeof(u.pv_status.pv_name)); + err = copy_from_user(&u.lv_req, arg, sizeof(u.lv_req.lv_name)); if (err) return -EFAULT; if (cmd != LV_REMOVE) { @@ -2991,7 +2991,7 @@ case PV_CHANGE: case PV_STATUS: - err = copy_from_user(&u.pv_status, arg, sizeof(u.lv_req.lv_name)); + err = copy_from_user(&u.pv_status, arg, sizeof(u.pv_status.pv_name)); if (err) return -EFAULT; err = __get_user(ptr, &((pv_status_req32_t *)arg)->pv); @@ -3063,7 +3063,7 @@ if (u.lv_bydev.lv) { if (!err) err = copy_lv_t(ptr, u.lv_bydev.lv); - put_lv_t(u.lv_byindex.lv); + put_lv_t(u.lv_bydev.lv); } break; diff -urN linux-2.4.27/arch/sparc64/kernel/itlb_base.S linux-2.4.28/arch/sparc64/kernel/itlb_base.S --- linux-2.4.27/arch/sparc64/kernel/itlb_base.S 2003-06-13 07:51:32.000000000 -0700 +++ linux-2.4.28/arch/sparc64/kernel/itlb_base.S 2004-11-17 03:54:21.111377870 -0800 @@ -41,6 +41,9 @@ CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset ldxa [%g3 + %g6] ASI_P, %g5 ! Load VPTE 1: brgez,pn %g5, 3f ! Not valid, branch out + sethi %hi(_PAGE_EXEC), %g4 ! Delay-slot + andcc %g5, %g4, %g0 ! Executable? + be,pn %xcc, 3f ! Nope, branch. nop ! Delay-slot 2: stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load PTE into TLB retry ! Trap return @@ -73,9 +76,6 @@ nop nop nop - nop - nop - nop CREATE_VPTE_NOP #undef CREATE_VPTE_OFFSET1 diff -urN linux-2.4.27/arch/sparc64/kernel/pci_iommu.c linux-2.4.28/arch/sparc64/kernel/pci_iommu.c --- linux-2.4.27/arch/sparc64/kernel/pci_iommu.c 2003-11-28 10:26:19.000000000 -0800 +++ linux-2.4.28/arch/sparc64/kernel/pci_iommu.c 2004-11-17 03:54:21.124378405 -0800 @@ -56,6 +56,39 @@ } } +#define IOPTE_CONSISTENT(CTX) \ + (IOPTE_VALID | IOPTE_CACHE | \ + (((CTX) << 47) & IOPTE_CONTEXT)) + +#define IOPTE_STREAMING(CTX) \ + (IOPTE_CONSISTENT(CTX) | IOPTE_STBUF) + +/* Existing mappings are never marked invalid, instead they + * are pointed to a dummy page. + */ +#define IOPTE_IS_DUMMY(iommu, iopte) \ + ((iopte_val(*iopte) & IOPTE_PAGE) == (iommu)->dummy_page_pa) + +static void inline iopte_make_dummy(struct pci_iommu *iommu, iopte_t *iopte) +{ + unsigned long val = iopte_val(*iopte); + + val &= ~IOPTE_PAGE; + val |= iommu->dummy_page_pa; + + iopte_val(*iopte) = val; +} + +void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize) +{ + int i; + + tsbsize /= sizeof(iopte_t); + + for (i = 0; i < tsbsize; i++) + iopte_make_dummy(iommu, &iommu->page_table[i]); +} + static iopte_t *alloc_streaming_cluster(struct pci_iommu *iommu, unsigned long npages) { iopte_t *iopte, *limit, *first; @@ -79,7 +112,7 @@ first = iopte; for (;;) { - if (iopte_val(*iopte) == 0UL) { + if (IOPTE_IS_DUMMY(iommu, iopte)) { if ((iopte + (1 << cnum)) >= limit) ent = 0; else @@ -142,12 +175,12 @@ iopte = iommu->page_table + (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)); while (iopte > iommu->page_table) { iopte--; - if (!(iopte_val(*iopte) & IOPTE_VALID)) { + if (IOPTE_IS_DUMMY(iommu, iopte)) { unsigned long tmp = npages; while (--tmp) { iopte--; - if (iopte_val(*iopte) & IOPTE_VALID) + if (!IOPTE_IS_DUMMY(iommu, iopte)) break; } if (tmp == 0) { @@ -162,15 +195,6 @@ return NULL; } -#define IOPTE_CONSISTENT(CTX) \ - (IOPTE_VALID | IOPTE_CACHE | \ - (((CTX) << 47) & IOPTE_CONTEXT)) - -#define IOPTE_STREAMING(CTX) \ - (IOPTE_CONSISTENT(CTX) | IOPTE_STBUF) - -#define IOPTE_INVALID 0UL - /* Allocate and map kernel buffer of size SIZE using consistent mode * DMA for PCI device PDEV. Return non-NULL cpu-side address if * successful and set *DMA_ADDRP to the PCI side dma address. @@ -261,7 +285,7 @@ limit = (iommu->page_table + (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); while (walk < limit) { - if (iopte_val(*walk) != IOPTE_INVALID) + if (!IOPTE_IS_DUMMY(iommu, walk)) break; walk++; } @@ -280,7 +304,7 @@ ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; for (i = 0; i < npages; i++, iopte++) - iopte_val(*iopte) = IOPTE_INVALID; + iopte_make_dummy(iommu, iopte); if (iommu->iommu_ctxflush) { pci_iommu_write(iommu->iommu_ctxflush, ctx); @@ -377,7 +401,7 @@ base = iommu->page_table + ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); #ifdef DEBUG_PCI_IOMMU - if (iopte_val(*base) == IOPTE_INVALID) + if (IOPTE_IS_DUMMY(iommu, base)) printk("pci_unmap_single called on non-mapped region %08x,%08x from %016lx\n", bus_addr, sz, __builtin_return_address(0)); #endif @@ -416,7 +440,7 @@ } /* Step 2: Clear out first TSB entry. */ - iopte_val(*base) = IOPTE_INVALID; + iopte_make_dummy(iommu, base); free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, npages, ctx); @@ -617,7 +641,7 @@ ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); #ifdef DEBUG_PCI_IOMMU - if (iopte_val(*base) == IOPTE_INVALID) + if (IOPTE_IS_DUMMY(iommu, base)) printk("pci_unmap_sg called on non-mapped region %016lx,%d from %016lx\n", sglist->dma_address, nelems, __builtin_return_address(0)); #endif @@ -654,7 +678,7 @@ } /* Step 2: Clear out first TSB entry. */ - iopte_val(*base) = IOPTE_INVALID; + iopte_make_dummy(iommu, base); free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, npages, ctx); diff -urN linux-2.4.27/arch/sparc64/kernel/pci_psycho.c linux-2.4.28/arch/sparc64/kernel/pci_psycho.c --- linux-2.4.27/arch/sparc64/kernel/pci_psycho.c 2003-11-28 10:26:19.000000000 -0800 +++ linux-2.4.28/arch/sparc64/kernel/pci_psycho.c 2004-11-17 03:54:21.126378487 -0800 @@ -1106,17 +1106,17 @@ * bits for each PBM. */ tmp = psycho_read(base + PSYCHO_PCIA_CTRL); - tmp |= (PSYCHO_PCICTRL_SBH_ERR | - PSYCHO_PCICTRL_SERR | - PSYCHO_PCICTRL_SBH_INT | + tmp |= (PSYCHO_PCICTRL_SERR | + PSYCHO_PCICTRL_SBH_ERR | PSYCHO_PCICTRL_EEN); + tmp &= ~(PSYCHO_PCICTRL_SBH_INT); psycho_write(base + PSYCHO_PCIA_CTRL, tmp); tmp = psycho_read(base + PSYCHO_PCIB_CTRL); - tmp |= (PSYCHO_PCICTRL_SBH_ERR | - PSYCHO_PCICTRL_SERR | - PSYCHO_PCICTRL_SBH_INT | + tmp |= (PSYCHO_PCICTRL_SERR | + PSYCHO_PCICTRL_SBH_ERR | PSYCHO_PCICTRL_EEN); + tmp &= ~(PSYCHO_PCICTRL_SBH_INT); psycho_write(base + PSYCHO_PCIB_CTRL, tmp); } @@ -1373,6 +1373,14 @@ * in pci_iommu.c */ + iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); + if (!iommu->dummy_page) { + prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n"); + prom_halt(); + } + memset((void *)iommu->dummy_page, 0, PAGE_SIZE); + iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); + /* Using assumed page size 8K with 128K entries we need 1MB iommu page * table (128K ioptes * 8 bytes per iopte). This is * page order 7 on UltraSparc. @@ -1386,7 +1394,7 @@ iommu->page_table_sz_bits = 17; iommu->page_table_map_base = 0xc0000000; iommu->dma_addr_mask = 0xffffffff; - memset((char *)tsbbase, 0, IO_TSB_SIZE); + pci_iommu_table_init(iommu, IO_TSB_SIZE); /* We start with no consistent mappings. */ iommu->lowest_consistent_map = diff -urN linux-2.4.27/arch/sparc64/kernel/pci_sabre.c linux-2.4.28/arch/sparc64/kernel/pci_sabre.c --- linux-2.4.27/arch/sparc64/kernel/pci_sabre.c 2004-02-18 05:36:31.000000000 -0800 +++ linux-2.4.28/arch/sparc64/kernel/pci_sabre.c 2004-11-17 03:54:21.128378569 -0800 @@ -1315,6 +1315,14 @@ * in pci_iommu.c */ + iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); + if (!iommu->dummy_page) { + prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n"); + prom_halt(); + } + memset((void *)iommu->dummy_page, 0, PAGE_SIZE); + iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); + tsbbase = __get_free_pages(GFP_KERNEL, order = get_order(tsbsize * 1024 * 8)); if (!tsbbase) { prom_printf("SABRE_IOMMU: Error, gfp(tsb) failed.\n"); @@ -1323,7 +1331,7 @@ iommu->page_table = (iopte_t *)tsbbase; iommu->page_table_map_base = dvma_offset; iommu->dma_addr_mask = dma_mask; - memset((char *)tsbbase, 0, PAGE_SIZE << order); + pci_iommu_table_init(iommu, PAGE_SIZE << order); sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE, __pa(tsbbase)); diff -urN linux-2.4.27/arch/sparc64/kernel/pci_schizo.c linux-2.4.28/arch/sparc64/kernel/pci_schizo.c --- linux-2.4.27/arch/sparc64/kernel/pci_schizo.c 2003-11-28 10:26:19.000000000 -0800 +++ linux-2.4.28/arch/sparc64/kernel/pci_schizo.c 2004-11-17 03:54:21.130378652 -0800 @@ -1423,10 +1423,10 @@ SCHIZO_PCICTRL_RTRY_ERR | SCHIZO_PCICTRL_SBH_ERR | SCHIZO_PCICTRL_SERR | - SCHIZO_PCICTRL_SBH_INT | SCHIZO_PCICTRL_EEN); - err_no_mask = SCHIZO_PCICTRL_DTO_ERR; + err_no_mask = (SCHIZO_PCICTRL_DTO_ERR | + SCHIZO_PCICTRL_SBH_INT); /* Enable PCI Error interrupts and clear error * bits for each PBM. @@ -1843,6 +1843,14 @@ * in pci_iommu.c */ + iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); + if (!iommu->dummy_page) { + prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n"); + prom_halt(); + } + memset((void *)iommu->dummy_page, 0, PAGE_SIZE); + iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); + /* Using assumed page size 8K with 128K entries we need 1MB iommu page * table (128K ioptes * 8 bytes per iopte). This is * page order 7 on UltraSparc. @@ -1857,7 +1865,7 @@ iommu->page_table = (iopte_t *)tsbbase; iommu->page_table_map_base = vdma[0]; iommu->dma_addr_mask = dma_mask; - memset((char *)tsbbase, 0, PAGE_SIZE << order); + pci_iommu_table_init(iommu, PAGE_SIZE << order); switch (tsbsize) { case 64: diff -urN linux-2.4.27/arch/sparc64/kernel/process.c linux-2.4.28/arch/sparc64/kernel/process.c --- linux-2.4.27/arch/sparc64/kernel/process.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/sparc64/kernel/process.c 2004-11-17 03:54:21.131378693 -0800 @@ -503,7 +503,7 @@ distance = fp - psp; rval = (csp - distance); - if (copy_in_user(rval, psp, distance)) + if (copy_in_user((void *)rval, (void *)psp, distance)) rval = 0; else if (current->thread.flags & SPARC_FLAG_32BIT) { if (put_user(((u32)csp), &(((struct reg_window32 *)rval)->ins[6]))) diff -urN linux-2.4.27/arch/sparc64/kernel/sbus.c linux-2.4.28/arch/sparc64/kernel/sbus.c --- linux-2.4.27/arch/sparc64/kernel/sbus.c 2002-08-02 17:39:43.000000000 -0700 +++ linux-2.4.28/arch/sparc64/kernel/sbus.c 2004-11-17 03:54:21.132378734 -0800 @@ -27,10 +27,10 @@ * * On SYSIO, using an 8K page size we have 1GB of SBUS * DMA space mapped. We divide this space into equally - * sized clusters. Currently we allow clusters up to a - * size of 1MB. If anything begins to generate DMA - * mapping requests larger than this we will need to - * increase things a bit. + * sized clusters. We allocate a DMA mapping from the + * cluster that matches the order of the allocation, or + * if the order is greater than the number of clusters, + * we try to allocate from the last cluster. */ #define NCLUSTERS 8UL @@ -133,12 +133,17 @@ static iopte_t *alloc_streaming_cluster(struct sbus_iommu *iommu, unsigned long npages) { - iopte_t *iopte, *limit, *first; - unsigned long cnum, ent, flush_point; + iopte_t *iopte, *limit, *first, *cluster; + unsigned long cnum, ent, nent, flush_point, found; cnum = 0; + nent = 1; while ((1UL << cnum) < npages) cnum++; + if(cnum >= NCLUSTERS) { + nent = 1UL << (cnum - NCLUSTERS); + cnum = NCLUSTERS - 1; + } iopte = iommu->page_table + (cnum * CLUSTER_NPAGES); if (cnum == 0) @@ -151,22 +156,31 @@ flush_point = iommu->alloc_info[cnum].flush; first = iopte; + cluster = NULL; + found = 0; for (;;) { if (iopte_val(*iopte) == 0UL) { - if ((iopte + (1 << cnum)) >= limit) - ent = 0; - else - ent = ent + 1; - iommu->alloc_info[cnum].next = ent; - if (ent == flush_point) - __iommu_flushall(iommu); - break; + found++; + if (!cluster) + cluster = iopte; + } else { + /* Used cluster in the way */ + cluster = NULL; + found = 0; } + + if (found == nent) + break; + iopte += (1 << cnum); ent++; if (iopte >= limit) { iopte = (iommu->page_table + (cnum * CLUSTER_NPAGES)); ent = 0; + + /* Multiple cluster allocations must not wrap */ + cluster = NULL; + found = 0; } if (ent == flush_point) __iommu_flushall(iommu); @@ -174,8 +188,19 @@ goto bad; } + /* ent/iopte points to the last cluster entry we're going to use, + * so save our place for the next allocation. + */ + if ((iopte + (1 << cnum)) >= limit) + ent = 0; + else + ent = ent + 1; + iommu->alloc_info[cnum].next = ent; + if (ent == flush_point) + __iommu_flushall(iommu); + /* I've got your streaming cluster right here buddy boy... */ - return iopte; + return cluster; bad: printk(KERN_EMERG "sbus: alloc_streaming_cluster of npages(%ld) failed!\n", @@ -185,15 +210,23 @@ static void free_streaming_cluster(struct sbus_iommu *iommu, u32 base, unsigned long npages) { - unsigned long cnum, ent; + unsigned long cnum, ent, nent; iopte_t *iopte; cnum = 0; + nent = 1; while ((1UL << cnum) < npages) cnum++; + if(cnum >= NCLUSTERS) { + nent = 1UL << (cnum - NCLUSTERS); + cnum = NCLUSTERS - 1; + } ent = (base & CLUSTER_MASK) >> (IO_PAGE_SHIFT + cnum); iopte = iommu->page_table + ((base - MAP_BASE) >> IO_PAGE_SHIFT); - iopte_val(*iopte) = 0UL; + do { + iopte_val(*iopte) = 0UL; + iopte += 1 << cnum; + } while(--nent); /* If the global flush might not have caught this entry, * adjust the flush point such that we will flush before diff -urN linux-2.4.27/arch/sparc64/kernel/signal.c linux-2.4.28/arch/sparc64/kernel/signal.c --- linux-2.4.27/arch/sparc64/kernel/signal.c 2003-08-25 04:44:40.000000000 -0700 +++ linux-2.4.28/arch/sparc64/kernel/signal.c 2004-11-17 03:54:21.133378775 -0800 @@ -116,8 +116,8 @@ regs->tnpc = npc; err |= __get_user(regs->y, &((*grp)[MC_Y])); err |= __get_user(tstate, &((*grp)[MC_TSTATE])); - regs->tstate &= ~(TSTATE_ICC | TSTATE_XCC); - regs->tstate |= (tstate & (TSTATE_ICC | TSTATE_XCC)); + regs->tstate &= ~(TSTATE_ASI | TSTATE_ICC | TSTATE_XCC); + regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC)); err |= __get_user(regs->u_regs[UREG_G1], (&(*grp)[MC_G1])); err |= __get_user(regs->u_regs[UREG_G2], (&(*grp)[MC_G2])); err |= __get_user(regs->u_regs[UREG_G3], (&(*grp)[MC_G3])); @@ -437,9 +437,9 @@ err |= __get_user(tstate, &sf->regs.tstate); err |= copy_from_user(regs->u_regs, sf->regs.u_regs, sizeof(regs->u_regs)); - /* User can only change condition codes in %tstate. */ - regs->tstate &= ~(TSTATE_ICC | TSTATE_XCC); - regs->tstate |= (tstate & (TSTATE_ICC | TSTATE_XCC)); + /* User can only change condition codes and %asi in %tstate. */ + regs->tstate &= ~(TSTATE_ASI | TSTATE_ICC | TSTATE_XCC); + regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC)); err |= __get_user(fpu_save, &sf->fpu_save); if (fpu_save) diff -urN linux-2.4.27/arch/sparc64/kernel/signal32.c linux-2.4.28/arch/sparc64/kernel/signal32.c --- linux-2.4.27/arch/sparc64/kernel/signal32.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/sparc64/kernel/signal32.c 2004-11-17 03:54:21.134378816 -0800 @@ -62,6 +62,16 @@ unsigned extramask[_NSIG_WORDS32 - 1]; }; +/* This magic should be in g_upper[0] for all upper parts + * to be valid. + */ +#define SIGINFO_EXTRA_V8PLUS_MAGIC 0x130e269 +typedef struct { + unsigned int g_upper[8]; + unsigned int o_upper[8]; + unsigned int asi; +} siginfo_extra_v8plus_t; + /* * And the new one, intended to be used for Linux applications only * (we have enough in there to work with clone). @@ -292,8 +302,13 @@ if ((psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS) { err |= __get_user(i, &sf->v8plus.g_upper[0]); if (i == SIGINFO_EXTRA_V8PLUS_MAGIC) { + unsigned long asi; + for (i = UREG_G1; i <= UREG_I7; i++) err |= __get_user(((u32 *)regs->u_regs)[2*i], &sf->v8plus.g_upper[i]); + err |= __get_user(asi, &sf->v8plus.asi); + regs->tstate &= ~TSTATE_ASI; + regs->tstate |= ((asi & 0xffUL) << 24UL); } } @@ -431,8 +446,13 @@ if ((psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS) { err |= __get_user(i, &sf->v8plus.g_upper[0]); if (i == SIGINFO_EXTRA_V8PLUS_MAGIC) { + unsigned long asi; + for (i = UREG_G1; i <= UREG_I7; i++) err |= __get_user(((u32 *)regs->u_regs)[2*i], &sf->v8plus.g_upper[i]); + err |= __get_user(asi, &sf->v8plus.asi); + regs->tstate &= ~TSTATE_ASI; + regs->tstate |= ((asi & 0xffUL) << 24UL); } } @@ -728,7 +748,10 @@ err |= __put_user(sizeof(siginfo_extra_v8plus_t), &sf->extra_size); err |= __put_user(SIGINFO_EXTRA_V8PLUS_MAGIC, &sf->v8plus.g_upper[0]); for (i = 1; i < 16; i++) - err |= __put_user(((u32 *)regs->u_regs)[2*i], &sf->v8plus.g_upper[i]); + err |= __put_user(((u32 *)regs->u_regs)[2*i], + &sf->v8plus.g_upper[i]); + err |= __put_user((regs->tstate & TSTATE_ASI) >> 24UL, + &sf->v8plus.asi); if (psr & PSR_EF) { err |= save_fpu_state32(regs, &sf->fpu_state); @@ -1175,7 +1198,10 @@ err |= __put_user(sizeof(siginfo_extra_v8plus_t), &sf->extra_size); err |= __put_user(SIGINFO_EXTRA_V8PLUS_MAGIC, &sf->v8plus.g_upper[0]); for (i = 1; i < 16; i++) - err |= __put_user(((u32 *)regs->u_regs)[2*i], &sf->v8plus.g_upper[i]); + err |= __put_user(((u32 *)regs->u_regs)[2*i], + &sf->v8plus.g_upper[i]); + err |= __put_user((regs->tstate & TSTATE_ASI) >> 24UL, + &sf->v8plus.asi); if (psr & PSR_EF) { err |= save_fpu_state32(regs, &sf->fpu_state); diff -urN linux-2.4.27/arch/sparc64/kernel/sparc64_ksyms.c linux-2.4.28/arch/sparc64/kernel/sparc64_ksyms.c --- linux-2.4.27/arch/sparc64/kernel/sparc64_ksyms.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/sparc64/kernel/sparc64_ksyms.c 2004-11-17 03:54:21.135378857 -0800 @@ -324,8 +324,6 @@ #endif /* Special internal versions of library functions. */ -EXPORT_SYMBOL(__memcpy); -EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(_clear_page); EXPORT_SYMBOL(clear_user_page); EXPORT_SYMBOL(copy_user_page); @@ -334,14 +332,17 @@ EXPORT_SYMBOL(__memscan_generic); EXPORT_SYMBOL(__memcmp); EXPORT_SYMBOL(__strncmp); -EXPORT_SYMBOL(__memmove); +EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(csum_partial_copy_sparc64); /* Moving data to/from userspace. */ -EXPORT_SYMBOL(__copy_to_user); -EXPORT_SYMBOL(__copy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(___copy_to_user); +EXPORT_SYMBOL(___copy_from_user); +EXPORT_SYMBOL(___copy_in_user); +EXPORT_SYMBOL(copy_to_user_fixup); +EXPORT_SYMBOL(copy_from_user_fixup); +EXPORT_SYMBOL(copy_in_user_fixup); EXPORT_SYMBOL(__bzero_noasi); /* Various address conversion macros use this. */ diff -urN linux-2.4.27/arch/sparc64/kernel/sys_sparc32.c linux-2.4.28/arch/sparc64/kernel/sys_sparc32.c --- linux-2.4.27/arch/sparc64/kernel/sys_sparc32.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/sparc64/kernel/sys_sparc32.c 2004-11-17 03:54:21.138378981 -0800 @@ -2996,10 +2996,11 @@ if (optname == IPT_SO_SET_REPLACE) return do_netfilter_replace(fd, level, optname, optval, optlen); - if (optname == SO_ATTACH_FILTER) + if (level == SOL_SOCKET && optname == SO_ATTACH_FILTER) return do_set_attach_filter(fd, level, optname, optval, optlen); - if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) + if (level == SOL_SOCKET && + (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)) return do_set_sock_timeout(fd, level, optname, optval, optlen); return sys_setsockopt(fd, level, optname, optval, optlen); diff -urN linux-2.4.27/arch/sparc64/kernel/time.c linux-2.4.28/arch/sparc64/kernel/time.c --- linux-2.4.27/arch/sparc64/kernel/time.c 2003-11-28 10:26:19.000000000 -0800 +++ linux-2.4.28/arch/sparc64/kernel/time.c 2004-11-17 03:54:21.139379022 -0800 @@ -432,18 +432,12 @@ extern int _stext; extern int rwlock_impl_begin, rwlock_impl_end; extern int atomic_impl_begin, atomic_impl_end; - extern int __memcpy_begin, __memcpy_end; - extern int __bzero_begin, __bzero_end; extern int __bitops_begin, __bitops_end; if ((pc >= (unsigned long) &atomic_impl_begin && pc < (unsigned long) &atomic_impl_end) || (pc >= (unsigned long) &rwlock_impl_begin && pc < (unsigned long) &rwlock_impl_end) || - (pc >= (unsigned long) &__memcpy_begin && - pc < (unsigned long) &__memcpy_end) || - (pc >= (unsigned long) &__bzero_begin && - pc < (unsigned long) &__bzero_end) || (pc >= (unsigned long) &__bitops_begin && pc < (unsigned long) &__bitops_end)) pc = o7; diff -urN linux-2.4.27/arch/sparc64/lib/Makefile linux-2.4.28/arch/sparc64/lib/Makefile --- linux-2.4.27/arch/sparc64/lib/Makefile 2002-08-02 17:39:43.000000000 -0700 +++ linux-2.4.28/arch/sparc64/lib/Makefile 2004-11-17 03:54:21.139379022 -0800 @@ -8,9 +8,11 @@ L_TARGET = lib.a obj-y := PeeCeeI.o blockops.o debuglocks.o strlen.o strncmp.o \ memscan.o strncpy_from_user.o strlen_user.o memcmp.o checksum.o \ - VIScopy.o VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o \ + VISbzero.o VISmemset.o VIScsum.o VIScsumcopy.o \ VIScsumcopyusr.o VISsave.o atomic.o rwlock.o bitops.o \ - dec_and_lock.o U3memcpy.o U3copy_from_user.o U3copy_to_user.o \ - U3copy_in_user.o mcount.o + U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ + U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ + copy_in_user.o user_fixup.o memmove.o \ + dec_and_lock.o mcount.o include $(TOPDIR)/Rules.make diff -urN linux-2.4.27/arch/sparc64/lib/U1copy_from_user.S linux-2.4.28/arch/sparc64/lib/U1copy_from_user.S --- linux-2.4.27/arch/sparc64/lib/U1copy_from_user.S 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.4.28/arch/sparc64/lib/U1copy_from_user.S 2004-11-17 03:54:21.140379063 -0800 @@ -0,0 +1,33 @@ +/* U1copy_from_user.S: UltraSparc-I/II/IIi/IIe optimized copy from userspace. + * + * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) + */ + +#define EX_LD(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: retl; \ + mov 1, %o0; \ + .section __ex_table; \ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + +#define FUNC_NAME ___copy_from_user +#define LOAD(type,addr,dest) type##a [addr] %asi, dest +#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_AIUS, dest +#define EX_RETVAL(x) 0 + + /* Writing to %asi is _expensive_ so we hardcode it. + * Reading %asi to check for KERNEL_DS is comparatively + * cheap. + */ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, memcpy_user_stub; \ + nop; \ + +#include "U1memcpy.S" diff -urN linux-2.4.27/arch/sparc64/lib/U1copy_to_user.S linux-2.4.28/arch/sparc64/lib/U1copy_to_user.S --- linux-2.4.27/arch/sparc64/lib/U1copy_to_user.S 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.4.28/arch/sparc64/lib/U1copy_to_user.S 2004-11-17 03:54:21.141379104 -0800 @@ -0,0 +1,33 @@ +/* U1copy_to_user.S: UltraSparc-I/II/IIi/IIe optimized copy to userspace. + * + * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) + */ + +#define EX_ST(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: retl; \ + mov 1, %o0; \ + .section __ex_table; \ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + +#define FUNC_NAME ___copy_to_user +#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS +#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS +#define EX_RETVAL(x) 0 + + /* Writing to %asi is _expensive_ so we hardcode it. + * Reading %asi to check for KERNEL_DS is comparatively + * cheap. + */ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, memcpy_user_stub; \ + nop; \ + +#include "U1memcpy.S" diff -urN linux-2.4.27/arch/sparc64/lib/U1memcpy.S linux-2.4.28/arch/sparc64/lib/U1memcpy.S --- linux-2.4.27/arch/sparc64/lib/U1memcpy.S 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.4.28/arch/sparc64/lib/U1memcpy.S 2004-11-17 03:54:21.142379145 -0800 @@ -0,0 +1,552 @@ +/* U1memcpy.S: UltraSPARC-I/II/IIi/IIe optimized memcpy. + * + * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com) + * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz) + */ + +#ifdef __KERNEL__ +#include +#include +#else +#define ASI_BLK_P 0xf0 +#define FPRS_FEF 0x04 +#ifdef MEMCPY_DEBUG +#define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ + clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; +#define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs +#else +#define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs +#define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs +#endif +#endif + +#ifndef EX_LD +#define EX_LD(x) x +#endif + +#ifndef EX_ST +#define EX_ST(x) x +#endif + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +#endif + +#ifndef LOAD +#define LOAD(type,addr,dest) type [addr], dest +#endif + +#ifndef LOAD_BLK +#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_P, dest +#endif + +#ifndef STORE +#define STORE(type,src,addr) type src, [addr] +#endif + +#ifndef STORE_BLK +#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P +#endif + +#ifndef FUNC_NAME +#define FUNC_NAME memcpy +#endif + +#ifndef PREAMBLE +#define PREAMBLE +#endif + +#ifndef XCC +#define XCC xcc +#endif + +#define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \ + faligndata %f1, %f2, %f48; \ + faligndata %f2, %f3, %f50; \ + faligndata %f3, %f4, %f52; \ + faligndata %f4, %f5, %f54; \ + faligndata %f5, %f6, %f56; \ + faligndata %f6, %f7, %f58; \ + faligndata %f7, %f8, %f60; \ + faligndata %f8, %f9, %f62; + +#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ + EX_LD(LOAD_BLK(%src, %fdest)); \ + EX_ST(STORE_BLK(%fsrc, %dest)); \ + add %src, 0x40, %src; \ + subcc %len, 0x40, %len; \ + be,pn %xcc, jmptgt; \ + add %dest, 0x40, %dest; \ + +#define LOOP_CHUNK1(src, dest, len, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) +#define LOOP_CHUNK2(src, dest, len, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) +#define LOOP_CHUNK3(src, dest, len, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) + +#define STORE_SYNC(dest, fsrc) \ + EX_ST(STORE_BLK(%fsrc, %dest)); \ + add %dest, 0x40, %dest; + +#define STORE_JUMP(dest, fsrc, target) \ + EX_ST(STORE_BLK(%fsrc, %dest)); \ + add %dest, 0x40, %dest; \ + ba,pt %xcc, target; + +#define FINISH_VISCHUNK(dest, f0, f1, left) \ + subcc %left, 8, %left;\ + bl,pn %xcc, 95f; \ + faligndata %f0, %f1, %f48; \ + EX_ST(STORE(std, %f48, %dest)); \ + add %dest, 8, %dest; + +#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ + subcc %left, 8, %left; \ + bl,pn %xcc, 95f; \ + fsrc1 %f0, %f1; + +#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ + UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ + ba,a,pt %xcc, 93f; + + .register %g2,#scratch + .register %g3,#scratch + + .text + .align 64 + + .globl FUNC_NAME +FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ + PREAMBLE + mov %o0, %g5 + cmp %o2, 0 + be,pn %XCC, 85f + or %o0, %o1, %o3 + cmp %o2, 16 + blu,a,pn %XCC, 80f + or %o3, %o2, %o3 + + cmp %o2, (5 * 64) + blu,pt %XCC, 70f + andcc %o3, 0x7, %g0 + + /* Clobbers o5/g1/g2/g3/g7/icc/xcc. */ + VISEntry + + /* Is 'dst' already aligned on an 64-byte boundary? */ + andcc %o0, 0x3f, %g2 + be,pt %XCC, 2f + + /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number + * of bytes to copy to make 'dst' 64-byte aligned. We pre- + * subtract this from 'len'. + */ + sub %o0, %o1, %o4 + sub %g2, 0x40, %g2 + sub %g0, %g2, %g2 + sub %o2, %g2, %o2 + andcc %g2, 0x7, %g1 + be,pt %icc, 2f + and %g2, 0x38, %g2 + +1: subcc %g1, 0x1, %g1 + EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) + EX_ST(STORE(stb, %o3, %o1 + %o4)) + bgu,pt %XCC, 1b + add %o1, 0x1, %o1 + + add %o1, %o4, %o0 + +2: cmp %g2, 0x0 + and %o1, 0x7, %g1 + be,pt %icc, 3f + alignaddr %o1, %g0, %o1 + + EX_LD(LOAD(ldd, %o1, %f4)) +1: EX_LD(LOAD(ldd, %o1 + 0x8, %f6)) + add %o1, 0x8, %o1 + subcc %g2, 0x8, %g2 + faligndata %f4, %f6, %f0 + EX_ST(STORE(std, %f0, %o0)) + be,pn %icc, 3f + add %o0, 0x8, %o0 + + EX_LD(LOAD(ldd, %o1 + 0x8, %f4)) + add %o1, 0x8, %o1 + subcc %g2, 0x8, %g2 + faligndata %f6, %f4, %f0 + EX_ST(STORE(std, %f0, %o0)) + bne,pt %icc, 1b + add %o0, 0x8, %o0 + + /* Destination is 64-byte aligned. */ +3: + membar #LoadStore | #StoreStore | #StoreLoad + + subcc %o2, 0x40, %o4 + add %o1, %g1, %g1 + andncc %o4, (0x40 - 1), %o4 + srl %g1, 3, %g2 + sub %o2, %o4, %g3 + andn %o1, (0x40 - 1), %o1 + and %g2, 7, %g2 + andncc %g3, 0x7, %g3 + fmovd %f0, %f2 + sub %g3, 0x8, %g3 + sub %o2, %o4, %o2 + + add %g1, %o4, %g1 + subcc %o2, %g3, %o2 + + EX_LD(LOAD_BLK(%o1, %f0)) + add %o1, 0x40, %o1 + add %g1, %g3, %g1 + EX_LD(LOAD_BLK(%o1, %f16)) + add %o1, 0x40, %o1 + sub %o4, 0x80, %o4 + EX_LD(LOAD_BLK(%o1, %f32)) + add %o1, 0x40, %o1 + + /* There are 8 instances of the unrolled loop, + * one for each possible alignment of the + * source buffer. Each loop instance is 452 + * bytes. + */ + sll %g2, 3, %o3 + sub %o3, %g2, %o3 + sllx %o3, 4, %o3 + add %o3, %g2, %o3 + sllx %o3, 2, %g2 +1: rd %pc, %o3 + add %o3, %lo(1f - 1b), %o3 + jmpl %o3 + %g2, %g0 + nop + + .align 64 +1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) + LOOP_CHUNK1(o1, o0, o4, 1f) + FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) + LOOP_CHUNK2(o1, o0, o4, 2f) + FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) + LOOP_CHUNK3(o1, o0, o4, 3f) + ba,pt %xcc, 1b+4 + faligndata %f0, %f2, %f48 +1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) + STORE_JUMP(o0, f48, 40f) membar #Sync +2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) + STORE_JUMP(o0, f48, 48f) membar #Sync +3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) + STORE_JUMP(o0, f48, 56f) membar #Sync + +1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) + LOOP_CHUNK1(o1, o0, o4, 1f) + FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) + LOOP_CHUNK2(o1, o0, o4, 2f) + FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) + LOOP_CHUNK3(o1, o0, o4, 3f) + ba,pt %xcc, 1b+4 + faligndata %f2, %f4, %f48 +1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) + STORE_JUMP(o0, f48, 41f) membar #Sync +2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) + STORE_JUMP(o0, f48, 49f) membar #Sync +3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) + STORE_JUMP(o0, f48, 57f) membar #Sync + +1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) + LOOP_CHUNK1(o1, o0, o4, 1f) + FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) + LOOP_CHUNK2(o1, o0, o4, 2f) + FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) + LOOP_CHUNK3(o1, o0, o4, 3f) + ba,pt %xcc, 1b+4 + faligndata %f4, %f6, %f48 +1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) + STORE_JUMP(o0, f48, 42f) membar #Sync +2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) + STORE_JUMP(o0, f48, 50f) membar #Sync +3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) + STORE_JUMP(o0, f48, 58f) membar #Sync + +1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) + LOOP_CHUNK1(o1, o0, o4, 1f) + FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) + LOOP_CHUNK2(o1, o0, o4, 2f) + FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) + LOOP_CHUNK3(o1, o0, o4, 3f) + ba,pt %xcc, 1b+4 + faligndata %f6, %f8, %f48 +1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) + STORE_JUMP(o0, f48, 43f) membar #Sync +2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) + STORE_JUMP(o0, f48, 51f) membar #Sync +3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) + STORE_JUMP(o0, f48, 59f) membar #Sync + +1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) + LOOP_CHUNK1(o1, o0, o4, 1f) + FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) + LOOP_CHUNK2(o1, o0, o4, 2f) + FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) + LOOP_CHUNK3(o1, o0, o4, 3f) + ba,pt %xcc, 1b+4 + faligndata %f8, %f10, %f48 +1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) + STORE_JUMP(o0, f48, 44f) membar #Sync +2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) + STORE_JUMP(o0, f48, 52f) membar #Sync +3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) + STORE_JUMP(o0, f48, 60f) membar #Sync + +1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) + LOOP_CHUNK1(o1, o0, o4, 1f) + FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) + LOOP_CHUNK2(o1, o0, o4, 2f) + FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) + LOOP_CHUNK3(o1, o0, o4, 3f) + ba,pt %xcc, 1b+4 + faligndata %f10, %f12, %f48 +1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) + STORE_JUMP(o0, f48, 45f) membar #Sync +2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) + STORE_JUMP(o0, f48, 53f) membar #Sync +3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) + STORE_JUMP(o0, f48, 61f) membar #Sync + +1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) + LOOP_CHUNK1(o1, o0, o4, 1f) + FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) + LOOP_CHUNK2(o1, o0, o4, 2f) + FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) + LOOP_CHUNK3(o1, o0, o4, 3f) + ba,pt %xcc, 1b+4 + faligndata %f12, %f14, %f48 +1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) + STORE_JUMP(o0, f48, 46f) membar #Sync +2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) + STORE_JUMP(o0, f48, 54f) membar #Sync +3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) + STORE_JUMP(o0, f48, 62f) membar #Sync + +1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) + LOOP_CHUNK1(o1, o0, o4, 1f) + FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) + LOOP_CHUNK2(o1, o0, o4, 2f) + FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) + LOOP_CHUNK3(o1, o0, o4, 3f) + ba,pt %xcc, 1b+4 + faligndata %f14, %f16, %f48 +1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) + STORE_JUMP(o0, f48, 47f) membar #Sync +2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) + STORE_JUMP(o0, f48, 55f) membar #Sync +3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) + STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) + STORE_JUMP(o0, f48, 63f) membar #Sync + +40: FINISH_VISCHUNK(o0, f0, f2, g3) +41: FINISH_VISCHUNK(o0, f2, f4, g3) +42: FINISH_VISCHUNK(o0, f4, f6, g3) +43: FINISH_VISCHUNK(o0, f6, f8, g3) +44: FINISH_VISCHUNK(o0, f8, f10, g3) +45: FINISH_VISCHUNK(o0, f10, f12, g3) +46: FINISH_VISCHUNK(o0, f12, f14, g3) +47: UNEVEN_VISCHUNK(o0, f14, f0, g3) +48: FINISH_VISCHUNK(o0, f16, f18, g3) +49: FINISH_VISCHUNK(o0, f18, f20, g3) +50: FINISH_VISCHUNK(o0, f20, f22, g3) +51: FINISH_VISCHUNK(o0, f22, f24, g3) +52: FINISH_VISCHUNK(o0, f24, f26, g3) +53: FINISH_VISCHUNK(o0, f26, f28, g3) +54: FINISH_VISCHUNK(o0, f28, f30, g3) +55: UNEVEN_VISCHUNK(o0, f30, f0, g3) +56: FINISH_VISCHUNK(o0, f32, f34, g3) +57: FINISH_VISCHUNK(o0, f34, f36, g3) +58: FINISH_VISCHUNK(o0, f36, f38, g3) +59: FINISH_VISCHUNK(o0, f38, f40, g3) +60: FINISH_VISCHUNK(o0, f40, f42, g3) +61: FINISH_VISCHUNK(o0, f42, f44, g3) +62: FINISH_VISCHUNK(o0, f44, f46, g3) +63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) + +93: EX_LD(LOAD(ldd, %o1, %f2)) + add %o1, 8, %o1 + subcc %g3, 8, %g3 + faligndata %f0, %f2, %f8 + EX_ST(STORE(std, %f8, %o0)) + bl,pn %xcc, 95f + add %o0, 8, %o0 + EX_LD(LOAD(ldd, %o1, %f0)) + add %o1, 8, %o1 + subcc %g3, 8, %g3 + faligndata %f2, %f0, %f8 + EX_ST(STORE(std, %f8, %o0)) + bge,pt %xcc, 93b + add %o0, 8, %o0 + +95: brz,pt %o2, 2f + mov %g1, %o1 + +1: EX_LD(LOAD(ldub, %o1, %o3)) + add %o1, 1, %o1 + subcc %o2, 1, %o2 + EX_ST(STORE(stb, %o3, %o0)) + bne,pt %xcc, 1b + add %o0, 1, %o0 + +2: membar #StoreLoad | #StoreStore + VISExit + retl + mov EX_RETVAL(%g5), %o0 + + .align 64 +70: /* 16 < len <= (5 * 64) */ + bne,pn %XCC, 75f + sub %o0, %o1, %o3 + +72: andn %o2, 0xf, %o4 + and %o2, 0xf, %o2 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) + subcc %o4, 0x10, %o4 + EX_ST(STORE(stx, %o5, %o1 + %o3)) + add %o1, 0x8, %o1 + EX_ST(STORE(stx, %g1, %o1 + %o3)) + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 +73: andcc %o2, 0x8, %g0 + be,pt %XCC, 1f + nop + EX_LD(LOAD(ldx, %o1, %o5)) + sub %o2, 0x8, %o2 + EX_ST(STORE(stx, %o5, %o1 + %o3)) + add %o1, 0x8, %o1 +1: andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + EX_LD(LOAD(lduw, %o1, %o5)) + sub %o2, 0x4, %o2 + EX_ST(STORE(stw, %o5, %o1 + %o3)) + add %o1, 0x4, %o1 +1: cmp %o2, 0 + be,pt %XCC, 85f + nop + ba,pt %xcc, 90f + nop + +75: andcc %o0, 0x7, %g1 + sub %g1, 0x8, %g1 + be,pn %icc, 2f + sub %g0, %g1, %g1 + sub %o2, %g1, %o2 + +1: EX_LD(LOAD(ldub, %o1, %o5)) + subcc %g1, 1, %g1 + EX_ST(STORE(stb, %o5, %o1 + %o3)) + bgu,pt %icc, 1b + add %o1, 1, %o1 + +2: add %o1, %o3, %o0 + andcc %o1, 0x7, %g1 + bne,pt %icc, 8f + sll %g1, 3, %g1 + + cmp %o2, 16 + bgeu,pt %icc, 72b + nop + ba,a,pt %xcc, 73b + +8: mov 64, %o3 + andn %o1, 0x7, %o1 + EX_LD(LOAD(ldx, %o1, %g2)) + sub %o3, %g1, %o3 + andn %o2, 0x7, %o4 + sllx %g2, %g1, %g2 +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) + subcc %o4, 0x8, %o4 + add %o1, 0x8, %o1 + srlx %g3, %o3, %o5 + or %o5, %g2, %o5 + EX_ST(STORE(stx, %o5, %o0)) + add %o0, 0x8, %o0 + bgu,pt %icc, 1b + sllx %g3, %g1, %g2 + + srl %g1, 3, %g1 + andcc %o2, 0x7, %o2 + be,pn %icc, 85f + add %o1, %g1, %o1 + ba,pt %xcc, 90f + sub %o0, %o1, %o3 + + .align 64 +80: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, 90f + sub %o0, %o1, %o3 + +1: EX_LD(LOAD(lduw, %o1, %g1)) + subcc %o2, 4, %o2 + EX_ST(STORE(stw, %g1, %o1 + %o3)) + bgu,pt %XCC, 1b + add %o1, 4, %o1 + +85: retl + mov EX_RETVAL(%g5), %o0 + + .align 32 +90: EX_LD(LOAD(ldub, %o1, %g1)) + subcc %o2, 1, %o2 + EX_ST(STORE(stb, %g1, %o1 + %o3)) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl + mov EX_RETVAL(%g5), %o0 diff -urN linux-2.4.27/arch/sparc64/lib/U3copy_from_user.S linux-2.4.28/arch/sparc64/lib/U3copy_from_user.S --- linux-2.4.27/arch/sparc64/lib/U3copy_from_user.S 2002-11-28 15:53:12.000000000 -0800 +++ linux-2.4.28/arch/sparc64/lib/U3copy_from_user.S 2004-11-17 03:54:21.143379186 -0800 @@ -1,519 +1,22 @@ -/* $Id: U3copy_from_user.S,v 1.3.2.1 2002/01/15 07:17:47 davem Exp $ - * U3memcpy.S: UltraSparc-III optimized copy from userspace. +/* U3copy_from_user.S: UltraSparc-III optimized copy from userspace. * - * Copyright (C) 1999, 2000 David S. Miller (davem@redhat.com) + * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#ifdef __KERNEL__ -#include -#include -#include -#include -#undef SMALL_COPY_USES_FPU -#define EXNV(x,y,a,b) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - ba U3cfu_fixup; \ - a, b, %o1; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ +#define EX_LD(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: retl; \ + mov 1, %o0; \ + .section __ex_table; \ + .align 4; \ + .word 98b, 99b; \ + .text; \ .align 4; -#define EX(x,y,a,b) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - ba U3cfu_fixup; \ - a, b, %o1; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EX2(x,y) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - and %o2, (0x40 - 1), %o1; \ - add %o1, %o4, %o1; \ - ba U3cfu_fixup; \ - add %o1, 0x1c0, %o1; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EX3(x,y) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - and %o2, (0x40 - 1), %o1; \ - sll %g3, 6, %g3; \ - add %o1, 0x80, %o1; \ - ba U3cfu_fixup; \ - add %o1, %g3, %o1; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EX4(x,y) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - and %o2, (0x40 - 1), %o1; \ - add %o1, 0x40, %o1; \ - ba U3cfu_fixup; \ - add %o1, %g3, %o1; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#else -#define ASI_BLK_P 0xf0 -#define FPRS_FEF 0x04 -#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs -#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs -#define SMALL_COPY_USES_FPU -#define EXNV(x,y,a,b) x,y; -#define EX(x,y,a,b) x,y; -#define EX2(x,y) x,y; -#define EX3(x,y) x,y; -#define EX4(x,y) x,y; -#endif - - /* Special/non-trivial issues of this code: - * - * 1) %o5 is preserved from VISEntryHalf to VISExitHalf - * 2) Only low 32 FPU registers are used so that only the - * lower half of the FPU register set is dirtied by this - * code. This is especially important in the kernel. - * 3) This code never prefetches cachelines past the end - * of the source buffer. - */ - - .text - .align 32 - - /* The cheetah's flexible spine, oversized liver, enlarged heart, - * slender muscular body, and claws make it the swiftest hunter - * in Africa and the fastest animal on land. Can reach speeds - * of up to 2.4GB per second. - */ - - .globl U3copy_from_user -U3copy_from_user: /* %o0=dst, %o1=src, %o2=len */ -#ifndef __KERNEL__ - /* Save away original 'dst' for memcpy return value. */ - mov %o0, %g3 ! A0 Group -#endif - /* Anything to copy at all? */ - cmp %o2, 0 ! A1 - ble,pn %icc, U3copy_from_user_short_ret! BR - - /* Extremely small copy? */ - cmp %o2, 31 ! A0 Group - ble,pn %icc, U3copy_from_user_short ! BR - - /* Large enough to use unrolled prefetch loops? */ - cmp %o2, 0x100 ! A1 - bge,a,pt %icc, U3copy_from_user_enter ! BR Group - andcc %o0, 0x3f, %g2 ! A0 - - ba,pt %xcc, U3copy_from_user_toosmall ! BR Group - andcc %o0, 0x7, %g2 ! A0 - - .align 32 -U3copy_from_user_short: - /* Copy %o2 bytes from src to dst, one byte at a time. */ - EXNV(lduba [%o1 + 0x00] %asi, %o3, add %o2, %g0)! MS Group - add %o1, 0x1, %o1 ! A0 - add %o0, 0x1, %o0 ! A1 - subcc %o2, 1, %o2 ! A0 Group - - bg,pt %icc, U3copy_from_user_short ! BR - stb %o3, [%o0 + -1] ! MS Group (1-cycle stall) - -U3copy_from_user_short_ret: -#ifdef __KERNEL__ - retl ! BR Group (0-4 cycle stall) - clr %o0 ! A0 -#else - retl ! BR Group (0-4 cycle stall) - mov %g3, %o0 ! A0 -#endif - - /* Here len >= (6 * 64) and condition codes reflect execution - * of "andcc %o0, 0x7, %g2", done by caller. - */ - .align 64 -U3copy_from_user_enter: - /* Is 'dst' already aligned on an 64-byte boundary? */ - be,pt %xcc, 2f ! BR - - /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number - * of bytes to copy to make 'dst' 64-byte aligned. We pre- - * subtract this from 'len'. - */ - sub %g2, 0x40, %g2 ! A0 Group - sub %g0, %g2, %g2 ! A0 Group - sub %o2, %g2, %o2 ! A0 Group - - /* Copy %g2 bytes from src to dst, one byte at a time. */ -1: EXNV(lduba [%o1 + 0x00] %asi, %o3, add %o2, %g2)! MS (Group) - add %o1, 0x1, %o1 ! A1 - add %o0, 0x1, %o0 ! A0 Group - subcc %g2, 0x1, %g2 ! A1 - - bg,pt %icc, 1b ! BR Group - stb %o3, [%o0 + -1] ! MS Group - -2: VISEntryHalf ! MS+MS - and %o1, 0x7, %g1 ! A1 - ba,pt %xcc, U3copy_from_user_begin ! BR - alignaddr %o1, %g0, %o1 ! MS (Break-after) - - .align 64 -U3copy_from_user_begin: -#ifdef __KERNEL__ - .globl U3copy_from_user_nop_1_6 -U3copy_from_user_nop_1_6: - ldxa [%g0] ASI_DCU_CONTROL_REG, %g3 - sethi %uhi(DCU_PE), %o3 - sllx %o3, 32, %o3 - or %g3, %o3, %o3 - stxa %o3, [%g0] ASI_DCU_CONTROL_REG ! Enable P-cache - membar #Sync -#endif - prefetcha [%o1 + 0x000] %asi, #one_read ! MS Group1 - prefetcha [%o1 + 0x040] %asi, #one_read ! MS Group2 - andn %o2, (0x40 - 1), %o4 ! A0 - prefetcha [%o1 + 0x080] %asi, #one_read ! MS Group3 - cmp %o4, 0x140 ! A0 - prefetcha [%o1 + 0x0c0] %asi, #one_read ! MS Group4 - EX(ldda [%o1 + 0x000] %asi, %f0, add %o2, %g0) ! MS Group5 (%f0 results at G8) - bge,a,pt %icc, 1f ! BR - - prefetcha [%o1 + 0x100] %asi, #one_read ! MS Group6 -1: EX(ldda [%o1 + 0x008] %asi, %f2, add %o2, %g0) ! AX (%f2 results at G9) - cmp %o4, 0x180 ! A1 - bge,a,pt %icc, 1f ! BR - prefetcha [%o1 + 0x140] %asi, #one_read ! MS Group7 -1: EX(ldda [%o1 + 0x010] %asi, %f4, add %o2, %g0) ! AX (%f4 results at G10) - cmp %o4, 0x1c0 ! A1 - bge,a,pt %icc, 1f ! BR - - prefetcha [%o1 + 0x180] %asi, #one_read ! MS Group8 -1: faligndata %f0, %f2, %f16 ! FGA Group9 (%f16 at G12) - EX(ldda [%o1 + 0x018] %asi, %f6, add %o2, %g0) ! AX (%f6 results at G12) - faligndata %f2, %f4, %f18 ! FGA Group10 (%f18 results at G13) - EX(ldda [%o1 + 0x020] %asi, %f8, add %o2, %g0) ! MS (%f8 results at G13) - faligndata %f4, %f6, %f20 ! FGA Group12 (1-cycle stall,%f20 at G15) - EX(ldda [%o1 + 0x028] %asi, %f10, add %o2, %g0) ! MS (%f10 results at G15) - faligndata %f6, %f8, %f22 ! FGA Group13 (%f22 results at G16) - - EX(ldda [%o1 + 0x030] %asi, %f12, add %o2, %g0) ! MS (%f12 results at G16) - faligndata %f8, %f10, %f24 ! FGA Group15 (1-cycle stall,%f24 at G18) - EX(ldda [%o1 + 0x038] %asi, %f14, add %o2, %g0) ! MS (%f14 results at G18) - faligndata %f10, %f12, %f26 ! FGA Group16 (%f26 results at G19) - EX(ldda [%o1 + 0x040] %asi, %f0, add %o2, %g0) ! MS (%f0 results at G19) - - /* We only use the first loop if len > (7 * 64). */ - subcc %o4, 0x1c0, %o4 ! A0 Group17 - bg,pt %icc, U3copy_from_user_loop1 ! BR - add %o1, 0x40, %o1 ! A1 - - add %o4, 0x140, %o4 ! A0 Group18 - ba,pt %xcc, U3copy_from_user_loop2 ! BR - srl %o4, 6, %o3 ! A0 Group19 - nop - nop - nop - nop - nop - - nop - nop - - /* This loop performs the copy and queues new prefetches. - * We drop into the second loop when len <= (5 * 64). Note - * that this (5 * 64) factor has been subtracted from len - * already. - */ -U3copy_from_user_loop1: - EX2(ldda [%o1 + 0x008] %asi, %f2) ! MS Group2 (%f2 results at G5) - faligndata %f12, %f14, %f28 ! FGA (%f28 results at G5) - EX2(ldda [%o1 + 0x010] %asi, %f4) ! MS Group3 (%f4 results at G6) - faligndata %f14, %f0, %f30 ! FGA Group4 (1-cycle stall, %f30 at G7) - stda %f16, [%o0] ASI_BLK_P ! MS - EX2(ldda [%o1 + 0x018] %asi, %f6) ! AX (%f6 results at G7) - - faligndata %f0, %f2, %f16 ! FGA Group12 (7-cycle stall) - EX2(ldda [%o1 + 0x020] %asi, %f8) ! MS (%f8 results at G15) - faligndata %f2, %f4, %f18 ! FGA Group13 (%f18 results at G16) - EX2(ldda [%o1 + 0x028] %asi, %f10) ! MS (%f10 results at G16) - faligndata %f4, %f6, %f20 ! FGA Group14 (%f20 results at G17) - EX2(ldda [%o1 + 0x030] %asi, %f12) ! MS (%f12 results at G17) - faligndata %f6, %f8, %f22 ! FGA Group15 (%f22 results at G18) - EX2(ldda [%o1 + 0x038] %asi, %f14) ! MS (%f14 results at G18) - - faligndata %f8, %f10, %f24 ! FGA Group16 (%f24 results at G19) - EX2(ldda [%o1 + 0x040] %asi, %f0) ! AX (%f0 results at G19) - prefetcha [%o1 + 0x180] %asi, #one_read ! MS - faligndata %f10, %f12, %f26 ! FGA Group17 (%f26 results at G20) - subcc %o4, 0x40, %o4 ! A0 - add %o1, 0x40, %o1 ! A1 - bg,pt %xcc, U3copy_from_user_loop1 ! BR - add %o0, 0x40, %o0 ! A0 Group18 - -U3copy_from_user_loop2_enter: - mov 5, %o3 ! A1 - - /* This loop performs on the copy, no new prefetches are - * queued. We do things this way so that we do not perform - * any spurious prefetches past the end of the src buffer. - */ -U3copy_from_user_loop2: - EX3(ldda [%o1 + 0x008] %asi, %f2) ! MS - faligndata %f12, %f14, %f28 ! FGA Group2 - EX3(ldda [%o1 + 0x010] %asi, %f4) ! MS - faligndata %f14, %f0, %f30 ! FGA Group4 (1-cycle stall) - stda %f16, [%o0] ASI_BLK_P ! MS - EX3(ldda [%o1 + 0x018] %asi, %f6) ! AX - faligndata %f0, %f2, %f16 ! FGA Group12 (7-cycle stall) - - EX3(ldda [%o1 + 0x020] %asi, %f8) ! MS - faligndata %f2, %f4, %f18 ! FGA Group13 - EX3(ldda [%o1 + 0x028] %asi, %f10) ! MS - faligndata %f4, %f6, %f20 ! FGA Group14 - EX3(ldda [%o1 + 0x030] %asi, %f12) ! MS - faligndata %f6, %f8, %f22 ! FGA Group15 - EX3(ldda [%o1 + 0x038] %asi, %f14) ! MS - faligndata %f8, %f10, %f24 ! FGA Group16 - - EX3(ldda [%o1 + 0x040] %asi, %f0) ! AX - faligndata %f10, %f12, %f26 ! FGA Group17 - subcc %o3, 0x01, %o3 ! A0 - add %o1, 0x40, %o1 ! A1 - bg,pt %xcc, U3copy_from_user_loop2 ! BR - add %o0, 0x40, %o0 ! A0 Group18 - - /* Finally we copy the last full 64-byte block. */ -U3copy_from_user_loopfini: - EX3(ldda [%o1 + 0x008] %asi, %f2) ! MS - faligndata %f12, %f14, %f28 ! FGA - EX3(ldda [%o1 + 0x010] %asi, %f4) ! MS Group19 - faligndata %f14, %f0, %f30 ! FGA - stda %f16, [%o0] ASI_BLK_P ! MS Group20 - EX3(ldda [%o1 + 0x018] %asi, %f6) ! AX - faligndata %f0, %f2, %f16 ! FGA Group11 (7-cycle stall) - EX3(ldda [%o1 + 0x020] %asi, %f8) ! MS - faligndata %f2, %f4, %f18 ! FGA Group12 - EX3(ldda [%o1 + 0x028] %asi, %f10) ! MS - faligndata %f4, %f6, %f20 ! FGA Group13 - EX3(ldda [%o1 + 0x030] %asi, %f12) ! MS - faligndata %f6, %f8, %f22 ! FGA Group14 - EX3(ldda [%o1 + 0x038] %asi, %f14) ! MS - faligndata %f8, %f10, %f24 ! FGA Group15 - cmp %g1, 0 ! A0 - be,pt %icc, 1f ! BR - add %o0, 0x40, %o0 ! A1 - EX4(ldda [%o1 + 0x040] %asi, %f0) ! MS -1: faligndata %f10, %f12, %f26 ! FGA Group16 - faligndata %f12, %f14, %f28 ! FGA Group17 - faligndata %f14, %f0, %f30 ! FGA Group18 - stda %f16, [%o0] ASI_BLK_P ! MS - add %o0, 0x40, %o0 ! A0 - add %o1, 0x40, %o1 ! A1 -#ifdef __KERNEL__ - .globl U3copy_from_user_nop_2_3 -U3copy_from_user_nop_2_3: - mov PRIMARY_CONTEXT, %o3 - stxa %g0, [%o3] ASI_DMMU ! Flush P-cache - stxa %g3, [%g0] ASI_DCU_CONTROL_REG ! Disable P-cache -#endif - membar #Sync ! MS Group26 (7-cycle stall) - - /* Now we copy the (len modulo 64) bytes at the end. - * Note how we borrow the %f0 loaded above. - * - * Also notice how this code is careful not to perform a - * load past the end of the src buffer just like similar - * code found in U3copy_from_user_toosmall processing. - */ -U3copy_from_user_loopend: - and %o2, 0x3f, %o2 ! A0 Group - andcc %o2, 0x38, %g2 ! A0 Group - be,pn %icc, U3copy_from_user_endcruft ! BR - subcc %g2, 0x8, %g2 ! A1 - be,pn %icc, U3copy_from_user_endcruft ! BR Group - cmp %g1, 0 ! A0 - - be,a,pt %icc, 1f ! BR Group - EX(ldda [%o1 + 0x00] %asi, %f0, add %o2, %g0) ! MS - -1: EX(ldda [%o1 + 0x08] %asi, %f2, add %o2, %g0) ! MS Group - add %o1, 0x8, %o1 ! A0 - sub %o2, 0x8, %o2 ! A1 - subcc %g2, 0x8, %g2 ! A0 Group - faligndata %f0, %f2, %f8 ! FGA Group - std %f8, [%o0 + 0x00] ! MS (XXX does it stall here? XXX) - be,pn %icc, U3copy_from_user_endcruft ! BR - add %o0, 0x8, %o0 ! A0 - EX(ldda [%o1 + 0x08] %asi, %f0, add %o2, %g0) ! MS Group - add %o1, 0x8, %o1 ! A0 - sub %o2, 0x8, %o2 ! A1 - subcc %g2, 0x8, %g2 ! A0 Group - faligndata %f2, %f0, %f8 ! FGA - std %f8, [%o0 + 0x00] ! MS (XXX does it stall here? XXX) - bne,pn %icc, 1b ! BR - add %o0, 0x8, %o0 ! A0 Group - - /* If anything is left, we copy it one byte at a time. - * Note that %g1 is (src & 0x3) saved above before the - * alignaddr was performed. - */ -U3copy_from_user_endcruft: - cmp %o2, 0 - add %o1, %g1, %o1 - VISExitHalf - be,pn %icc, U3copy_from_user_short_ret - nop - ba,a,pt %xcc, U3copy_from_user_short - - /* If we get here, then 32 <= len < (6 * 64) */ -U3copy_from_user_toosmall: - -#ifdef SMALL_COPY_USES_FPU - - /* Is 'dst' already aligned on an 8-byte boundary? */ - be,pt %xcc, 2f ! BR Group - - /* Compute abs((dst & 7) - 8) into %g2. This is the number - * of bytes to copy to make 'dst' 8-byte aligned. We pre- - * subtract this from 'len'. - */ - sub %g2, 0x8, %g2 ! A0 - sub %g0, %g2, %g2 ! A0 Group (reg-dep) - sub %o2, %g2, %o2 ! A0 Group (reg-dep) - - /* Copy %g2 bytes from src to dst, one byte at a time. */ -1: EXNV(lduba [%o1 + 0x00] %asi, %o3, add %o2, %g2)! MS (Group) (%o3 in 3 cycles) - add %o1, 0x1, %o1 ! A1 - add %o0, 0x1, %o0 ! A0 Group - subcc %g2, 0x1, %g2 ! A1 - - bg,pt %icc, 1b ! BR Group - stb %o3, [%o0 + -1] ! MS Group - -2: VISEntryHalf ! MS+MS - - /* Compute (len - (len % 8)) into %g2. This is guarenteed - * to be nonzero. - */ - andn %o2, 0x7, %g2 ! A0 Group - - /* You may read this and believe that it allows reading - * one 8-byte longword past the end of src. It actually - * does not, as %g2 is subtracted as loads are done from - * src, so we always stop before running off the end. - * Also, we are guarenteed to have at least 0x10 bytes - * to move here. - */ - sub %g2, 0x8, %g2 ! A0 Group (reg-dep) - alignaddr %o1, %g0, %g1 ! MS (Break-after) - EX(ldda [%g1 + 0x00] %asi, %f0, add %o2, %g0) ! MS Group (1-cycle stall) - add %g1, 0x8, %g1 ! A0 - -1: EX(ldda [%g1 + 0x00] %asi, %f2, add %o2, %g0) ! MS Group - add %g1, 0x8, %g1 ! A0 - sub %o2, 0x8, %o2 ! A1 - subcc %g2, 0x8, %g2 ! A0 Group - - faligndata %f0, %f2, %f8 ! FGA Group (1-cycle stall) - std %f8, [%o0 + 0x00] ! MS Group (2-cycle stall) - add %o1, 0x8, %o1 ! A0 - be,pn %icc, 2f ! BR - - add %o0, 0x8, %o0 ! A1 - EX(ldda [%g1 + 0x00] %asi, %f0, add %o2, %g0) ! MS Group - add %g1, 0x8, %g1 ! A0 - sub %o2, 0x8, %o2 ! A1 - - subcc %g2, 0x8, %g2 ! A0 Group - faligndata %f2, %f0, %f8 ! FGA Group (1-cycle stall) - std %f8, [%o0 + 0x00] ! MS Group (2-cycle stall) - add %o1, 0x8, %o1 ! A0 - - bne,pn %icc, 1b ! BR - add %o0, 0x8, %o0 ! A1 - - /* Nothing left to copy? */ -2: cmp %o2, 0 ! A0 Group - VISExitHalf ! A0+MS - be,pn %icc, U3copy_from_user_short_ret! BR Group - nop ! A0 - ba,a,pt %xcc, U3copy_from_user_short ! BR Group - -#else /* !(SMALL_COPY_USES_FPU) */ - - xor %o1, %o0, %g2 - andcc %g2, 0x7, %g0 - bne,pn %icc, U3copy_from_user_short - andcc %o1, 0x7, %g2 - - be,pt %xcc, 2f - sub %g2, 0x8, %g2 - sub %g0, %g2, %g2 - sub %o2, %g2, %o2 - -1: EXNV(lduba [%o1 + 0x00] %asi, %o3, add %o2, %g2) - add %o1, 0x1, %o1 - add %o0, 0x1, %o0 - subcc %g2, 0x1, %g2 - bg,pt %icc, 1b - stb %o3, [%o0 + -1] - -2: andn %o2, 0x7, %g2 - sub %o2, %g2, %o2 - -3: EXNV(ldxa [%o1 + 0x00] %asi, %o3, add %o2, %g2) - add %o1, 0x8, %o1 - add %o0, 0x8, %o0 - subcc %g2, 0x8, %g2 - bg,pt %icc, 3b - stx %o3, [%o0 + -8] - - cmp %o2, 0 - bne,pn %icc, U3copy_from_user_short - nop - ba,a,pt %xcc, U3copy_from_user_short_ret - -#endif /* !(SMALL_COPY_USES_FPU) */ - -#ifdef __KERNEL__ - .globl U3cfu_fixup -U3cfu_fixup: - /* Since this is copy_from_user(), zero out the rest of the - * kernel buffer. - */ - cmp %o1, 0 - ble,pn %icc, 2f - mov %o1, %g2 -1: subcc %g2, 1, %g2 - stb %g0, [%o0] - bne,pt %icc, 1b - add %o0, 1, %o0 +#define FUNC_NAME U3copy_from_user +#define LOAD(type,addr,dest) type##a [addr] %asi, dest +#define EX_RETVAL(x) 0 -2: retl - mov %o1, %o0 -#endif +#include "U3memcpy.S" diff -urN linux-2.4.27/arch/sparc64/lib/U3copy_in_user.S linux-2.4.28/arch/sparc64/lib/U3copy_in_user.S --- linux-2.4.27/arch/sparc64/lib/U3copy_in_user.S 2001-03-25 18:14:21.000000000 -0800 +++ linux-2.4.28/arch/sparc64/lib/U3copy_in_user.S 1969-12-31 16:00:00.000000000 -0800 @@ -1,531 +0,0 @@ -/* $Id: U3copy_in_user.S,v 1.4 2001/03/21 05:58:47 davem Exp $ - * U3memcpy.S: UltraSparc-III optimized copy within userspace. - * - * Copyright (C) 1999, 2000 David S. Miller (davem@redhat.com) - */ - -#ifdef __KERNEL__ -#include -#include -#undef SMALL_COPY_USES_FPU -#define EXNV(x,y,a,b) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - a, b, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXNV2(x,y,a,b) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: a, b, %o0; \ - retl; \ - add %o0, 1, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXNV3(x,y,a,b) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: a, b, %o0; \ - retl; \ - add %o0, 8, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EX(x,y,a,b) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - retl; \ - a, b, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXBLK1(x,y) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - add %o4, 0x1c0, %o1; \ - and %o2, (0x40 - 1), %o2; \ - retl; \ - add %o1, %o2, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXBLK2(x,y) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - sll %o3, 6, %o3; \ - and %o2, (0x40 - 1), %o2; \ - add %o3, 0x80, %o1; \ - retl; \ - add %o1, %o2, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXBLK3(x,y) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - and %o2, (0x40 - 1), %o2; \ - retl; \ - add %o2, 0x80, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXBLK4(x,y) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - and %o2, (0x40 - 1), %o2; \ - retl; \ - add %o2, 0x40, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#else -#define ASI_AIUS 0x80 -#define ASI_BLK_AIUS 0xf0 -#define FPRS_FEF 0x04 -#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs -#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs -#define SMALL_COPY_USES_FPU -#define EXNV(x,y,a,b) x,y; -#define EXNV2(x,y,a,b) x,y; -#define EXNV3(x,y,a,b) x,y; -#define EX(x,y,a,b) x,y; -#define EXBLK1(x,y) x,y; -#define EXBLK2(x,y) x,y; -#define EXBLK3(x,y) x,y; -#define EXBLK4(x,y) x,y; -#endif - - /* Special/non-trivial issues of this code: - * - * 1) %o5 is preserved from VISEntryHalf to VISExitHalf - * 2) Only low 32 FPU registers are used so that only the - * lower half of the FPU register set is dirtied by this - * code. This is especially important in the kernel. - * 3) This code never prefetches cachelines past the end - * of the source buffer. - * - * XXX Actually, Cheetah can buffer up to 8 concurrent - * XXX prefetches, revisit this... - */ - - .text - .align 32 - - /* The cheetah's flexible spine, oversized liver, enlarged heart, - * slender muscular body, and claws make it the swiftest hunter - * in Africa and the fastest animal on land. Can reach speeds - * of up to 2.4GB per second. - */ - - .globl U3copy_in_user -U3copy_in_user: /* %o0=dst, %o1=src, %o2=len */ - /* Writing to %asi is _expensive_ so we hardcode it. - * Reading %asi to check for KERNEL_DS is comparatively - * cheap. - */ - rd %asi, %g1 ! MS Group (4 cycles) - cmp %g1, ASI_AIUS ! A0 Group - bne U3memcpy ! BR - nop ! A1 -#ifndef __KERNEL__ - /* Save away original 'dst' for memcpy return value. */ - mov %o0, %g3 ! A0 Group -#endif - /* Anything to copy at all? */ - cmp %o2, 0 ! A1 - ble,pn %icc, U3copy_in_user_short_ret ! BR - - /* Extremely small copy? */ - cmp %o2, 31 ! A0 Group - ble,pn %icc, U3copy_in_user_short ! BR - - /* Large enough to use unrolled prefetch loops? */ - cmp %o2, 0x100 ! A1 - bge,a,pt %icc, U3copy_in_user_enter ! BR Group - andcc %o0, 0x3f, %g2 ! A0 - - ba,pt %xcc, U3copy_in_user_toosmall ! BR Group - andcc %o0, 0x7, %g2 ! A0 - - .align 32 -U3copy_in_user_short: - /* Copy %o2 bytes from src to dst, one byte at a time. */ - EXNV(lduba [%o1 + 0x00] %asi, %o3, add %o2, %g0)! MS Group - add %o1, 0x1, %o1 ! A0 - add %o0, 0x1, %o0 ! A1 - subcc %o2, 1, %o2 ! A0 Group - - bg,pt %icc, U3copy_in_user_short ! BR - EXNV(stba %o3, [%o0 + -1] %asi, add %o2, 1) ! MS Group (1-cycle stall) - -U3copy_in_user_short_ret: -#ifdef __KERNEL__ - retl ! BR Group (0-4 cycle stall) - clr %o0 ! A0 -#else - retl ! BR Group (0-4 cycle stall) - mov %g3, %o0 ! A0 -#endif - - /* Here len >= (6 * 64) and condition codes reflect execution - * of "andcc %o0, 0x7, %g2", done by caller. - */ - .align 64 -U3copy_in_user_enter: - /* Is 'dst' already aligned on an 64-byte boundary? */ - be,pt %xcc, 2f ! BR - - /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number - * of bytes to copy to make 'dst' 64-byte aligned. We pre- - * subtract this from 'len'. - */ - sub %g2, 0x40, %g2 ! A0 Group - sub %g0, %g2, %g2 ! A0 Group - sub %o2, %g2, %o2 ! A0 Group - - /* Copy %g2 bytes from src to dst, one byte at a time. */ -1: EXNV(lduba [%o1 + 0x00] %asi, %o3, add %o2, %g2)! MS (Group) - add %o1, 0x1, %o1 ! A1 - add %o0, 0x1, %o0 ! A0 Group - subcc %g2, 0x1, %g2 ! A1 - - bg,pt %icc, 1b ! BR Group - EXNV2(stba %o3, [%o0 + -1] %asi, add %o2, %g2) ! MS Group - -2: VISEntryHalf ! MS+MS - and %o1, 0x7, %g1 ! A1 - ba,pt %xcc, U3copy_in_user_begin ! BR - alignaddr %o1, %g0, %o1 ! MS (Break-after) - - .align 64 -U3copy_in_user_begin: - prefetcha [%o1 + 0x000] %asi, #one_read ! MS Group1 - prefetcha [%o1 + 0x040] %asi, #one_read ! MS Group2 - andn %o2, (0x40 - 1), %o4 ! A0 - prefetcha [%o1 + 0x080] %asi, #one_read ! MS Group3 - cmp %o4, 0x140 ! A0 - prefetcha [%o1 + 0x0c0] %asi, #one_read ! MS Group4 - EX(ldda [%o1 + 0x000] %asi, %f0, add %o2, %g0) ! MS Group5 (%f0 results at G8) - bge,a,pt %icc, 1f ! BR - - prefetcha [%o1 + 0x100] %asi, #one_read ! MS Group6 -1: EX(ldda [%o1 + 0x008] %asi, %f2, add %o2, %g0) ! AX (%f2 results at G9) - cmp %o4, 0x180 ! A1 - bge,a,pt %icc, 1f ! BR - prefetcha [%o1 + 0x140] %asi, #one_read ! MS Group7 -1: EX(ldda [%o1 + 0x010] %asi, %f4, add %o2, %g0) ! AX (%f4 results at G10) - cmp %o4, 0x1c0 ! A1 - bge,a,pt %icc, 1f ! BR - - prefetcha [%o1 + 0x180] %asi, #one_read ! MS Group8 -1: faligndata %f0, %f2, %f16 ! FGA Group9 (%f16 at G12) - EX(ldda [%o1 + 0x018] %asi, %f6, add %o2, %g0) ! AX (%f6 results at G12) - faligndata %f2, %f4, %f18 ! FGA Group10 (%f18 results at G13) - EX(ldda [%o1 + 0x020] %asi, %f8, add %o2, %g0) ! MS (%f8 results at G13) - faligndata %f4, %f6, %f20 ! FGA Group12 (1-cycle stall,%f20 at G15) - EX(ldda [%o1 + 0x028] %asi, %f10, add %o2, %g0) ! MS (%f10 results at G15) - faligndata %f6, %f8, %f22 ! FGA Group13 (%f22 results at G16) - - EX(ldda [%o1 + 0x030] %asi, %f12, add %o2, %g0) ! MS (%f12 results at G16) - faligndata %f8, %f10, %f24 ! FGA Group15 (1-cycle stall,%f24 at G18) - EX(ldda [%o1 + 0x038] %asi, %f14, add %o2, %g0) ! MS (%f14 results at G18) - faligndata %f10, %f12, %f26 ! FGA Group16 (%f26 results at G19) - EX(ldda [%o1 + 0x040] %asi, %f0, add %o2, %g0) ! MS (%f0 results at G19) - - /* We only use the first loop if len > (7 * 64). */ - subcc %o4, 0x1c0, %o4 ! A0 Group17 - bg,pt %icc, U3copy_in_user_loop1 ! BR - add %o1, 0x40, %o1 ! A1 - - add %o4, 0x140, %o4 ! A0 Group18 - ba,pt %xcc, U3copy_in_user_loop2 ! BR - srl %o4, 6, %o3 ! A0 Group19 - nop - nop - nop - nop - nop - - nop - nop - - /* This loop performs the copy and queues new prefetches. - * We drop into the second loop when len <= (5 * 64). Note - * that this (5 * 64) factor has been subtracted from len - * already. - */ -U3copy_in_user_loop1: - EXBLK1(ldda [%o1 + 0x008] %asi, %f2) ! MS Group2 (%f2 results at G5) - faligndata %f12, %f14, %f28 ! FGA (%f28 results at G5) - EXBLK1(ldda [%o1 + 0x010] %asi, %f4) ! MS Group3 (%f4 results at G6) - faligndata %f14, %f0, %f30 ! FGA Group4 (1-cycle stall, %f30 at G7) - EXBLK1(stda %f16, [%o0] ASI_BLK_AIUS) ! MS - EXBLK1(ldda [%o1 + 0x018] %asi, %f6) ! AX (%f6 results at G7) - - faligndata %f0, %f2, %f16 ! FGA Group12 (7-cycle stall) - EXBLK1(ldda [%o1 + 0x020] %asi, %f8) ! MS (%f8 results at G15) - faligndata %f2, %f4, %f18 ! FGA Group13 (%f18 results at G16) - EXBLK1(ldda [%o1 + 0x028] %asi, %f10) ! MS (%f10 results at G16) - faligndata %f4, %f6, %f20 ! FGA Group14 (%f20 results at G17) - EXBLK1(ldda [%o1 + 0x030] %asi, %f12) ! MS (%f12 results at G17) - faligndata %f6, %f8, %f22 ! FGA Group15 (%f22 results at G18) - EXBLK1(ldda [%o1 + 0x038] %asi, %f14) ! MS (%f14 results at G18) - - faligndata %f8, %f10, %f24 ! FGA Group16 (%f24 results at G19) - EXBLK1(ldda [%o1 + 0x040] %asi, %f0) ! AX (%f0 results at G19) - prefetcha [%o1 + 0x180] %asi, #one_read ! MS - faligndata %f10, %f12, %f26 ! FGA Group17 (%f26 results at G20) - subcc %o4, 0x40, %o4 ! A0 - add %o1, 0x40, %o1 ! A1 - bg,pt %xcc, U3copy_in_user_loop1 ! BR - add %o0, 0x40, %o0 ! A0 Group18 - -U3copy_in_user_loop2_enter: - mov 5, %o3 ! A1 - - /* This loop performs on the copy, no new prefetches are - * queued. We do things this way so that we do not perform - * any spurious prefetches past the end of the src buffer. - */ -U3copy_in_user_loop2: - EXBLK2(ldda [%o1 + 0x008] %asi, %f2) ! MS - faligndata %f12, %f14, %f28 ! FGA Group2 - EXBLK2(ldda [%o1 + 0x010] %asi, %f4) ! MS - faligndata %f14, %f0, %f30 ! FGA Group4 (1-cycle stall) - EXBLK2(stda %f16, [%o0] ASI_BLK_AIUS) ! MS - EXBLK2(ldda [%o1 + 0x018] %asi, %f6) ! AX - faligndata %f0, %f2, %f16 ! FGA Group12 (7-cycle stall) - - EXBLK2(ldda [%o1 + 0x020] %asi, %f8) ! MS - faligndata %f2, %f4, %f18 ! FGA Group13 - EXBLK2(ldda [%o1 + 0x028] %asi, %f10) ! MS - faligndata %f4, %f6, %f20 ! FGA Group14 - EXBLK2(ldda [%o1 + 0x030] %asi, %f12) ! MS - faligndata %f6, %f8, %f22 ! FGA Group15 - EXBLK2(ldda [%o1 + 0x038] %asi, %f14) ! MS - faligndata %f8, %f10, %f24 ! FGA Group16 - - EXBLK2(ldda [%o1 + 0x040] %asi, %f0) ! AX - faligndata %f10, %f12, %f26 ! FGA Group17 - subcc %o3, 0x01, %o3 ! A0 - add %o1, 0x40, %o1 ! A1 - bg,pt %xcc, U3copy_in_user_loop2 ! BR - add %o0, 0x40, %o0 ! A0 Group18 - - /* Finally we copy the last full 64-byte block. */ -U3copy_in_user_loopfini: - EXBLK3(ldda [%o1 + 0x008] %asi, %f2) ! MS - faligndata %f12, %f14, %f28 ! FGA - EXBLK3(ldda [%o1 + 0x010] %asi, %f4) ! MS Group19 - faligndata %f14, %f0, %f30 ! FGA - EXBLK3(stda %f16, [%o0] ASI_BLK_AIUS) ! MS Group20 - EXBLK4(ldda [%o1 + 0x018] %asi, %f6) ! AX - faligndata %f0, %f2, %f16 ! FGA Group11 (7-cycle stall) - EXBLK4(ldda [%o1 + 0x020] %asi, %f8) ! MS - faligndata %f2, %f4, %f18 ! FGA Group12 - EXBLK4(ldda [%o1 + 0x028] %asi, %f10) ! MS - faligndata %f4, %f6, %f20 ! FGA Group13 - EXBLK4(ldda [%o1 + 0x030] %asi, %f12) ! MS - faligndata %f6, %f8, %f22 ! FGA Group14 - EXBLK4(ldda [%o1 + 0x038] %asi, %f14) ! MS - faligndata %f8, %f10, %f24 ! FGA Group15 - cmp %g1, 0 ! A0 - be,pt %icc, 1f ! BR - add %o0, 0x40, %o0 ! A1 - EXBLK4(ldda [%o1 + 0x040] %asi, %f0) ! MS -1: faligndata %f10, %f12, %f26 ! FGA Group16 - faligndata %f12, %f14, %f28 ! FGA Group17 - faligndata %f14, %f0, %f30 ! FGA Group18 - EXBLK4(stda %f16, [%o0] ASI_BLK_AIUS) ! MS - add %o0, 0x40, %o0 ! A0 - add %o1, 0x40, %o1 ! A1 - membar #Sync ! MS Group26 (7-cycle stall) - - /* Now we copy the (len modulo 64) bytes at the end. - * Note how we borrow the %f0 loaded above. - * - * Also notice how this code is careful not to perform a - * load past the end of the src buffer just like similar - * code found in U3copy_in_user_toosmall processing. - */ -U3copy_in_user_loopend: - and %o2, 0x3f, %o2 ! A0 Group - andcc %o2, 0x38, %g2 ! A0 Group - be,pn %icc, U3copy_in_user_endcruft ! BR - subcc %g2, 0x8, %g2 ! A1 - be,pn %icc, U3copy_in_user_endcruft ! BR Group - cmp %g1, 0 ! A0 - - be,a,pt %icc, 1f ! BR Group - EX(ldda [%o1 + 0x00] %asi, %f0, add %o2, %g0) ! MS - -1: EX(ldda [%o1 + 0x08] %asi, %f2, add %o2, %g0) ! MS Group - add %o1, 0x8, %o1 ! A0 - sub %o2, 0x8, %o2 ! A1 - subcc %g2, 0x8, %g2 ! A0 Group - faligndata %f0, %f2, %f8 ! FGA Group - EX(stda %f8, [%o0 + 0x00] %asi, add %o2, 0x8) ! MS (XXX does it stall here? XXX) - be,pn %icc, U3copy_in_user_endcruft ! BR - add %o0, 0x8, %o0 ! A0 - EX(ldda [%o1 + 0x08] %asi, %f0, add %o2, %g0) ! MS Group - add %o1, 0x8, %o1 ! A0 - sub %o2, 0x8, %o2 ! A1 - subcc %g2, 0x8, %g2 ! A0 Group - faligndata %f2, %f0, %f8 ! FGA - EX(stda %f8, [%o0 + 0x00] %asi, add %o2, 0x8) ! MS (XXX does it stall here? XXX) - bne,pn %icc, 1b ! BR - add %o0, 0x8, %o0 ! A0 Group - - /* If anything is left, we copy it one byte at a time. - * Note that %g1 is (src & 0x3) saved above before the - * alignaddr was performed. - */ -U3copy_in_user_endcruft: - cmp %o2, 0 - add %o1, %g1, %o1 - VISExitHalf - be,pn %icc, U3copy_in_user_short_ret - nop - ba,a,pt %xcc, U3copy_in_user_short - - /* If we get here, then 32 <= len < (6 * 64) */ -U3copy_in_user_toosmall: - -#ifdef SMALL_COPY_USES_FPU - - /* Is 'dst' already aligned on an 8-byte boundary? */ - be,pt %xcc, 2f ! BR Group - - /* Compute abs((dst & 7) - 8) into %g2. This is the number - * of bytes to copy to make 'dst' 8-byte aligned. We pre- - * subtract this from 'len'. - */ - sub %g2, 0x8, %g2 ! A0 - sub %g0, %g2, %g2 ! A0 Group (reg-dep) - sub %o2, %g2, %o2 ! A0 Group (reg-dep) - - /* Copy %g2 bytes from src to dst, one byte at a time. */ -1: EXNV2(lduba [%o1 + 0x00] %asi, %o3, add %o2, %g2)! MS (Group) (%o3 in 3 cycles) - add %o1, 0x1, %o1 ! A1 - add %o0, 0x1, %o0 ! A0 Group - subcc %g2, 0x1, %g2 ! A1 - - bg,pt %icc, 1b ! BR Group - EXNV2(stba %o3, [%o0 + -1] %asi, add %o2, %g2) ! MS Group - -2: VISEntryHalf ! MS+MS - - /* Compute (len - (len % 8)) into %g2. This is guarenteed - * to be nonzero. - */ - andn %o2, 0x7, %g2 ! A0 Group - - /* You may read this and believe that it allows reading - * one 8-byte longword past the end of src. It actually - * does not, as %g2 is subtracted as loads are done from - * src, so we always stop before running off the end. - * Also, we are guarenteed to have at least 0x10 bytes - * to move here. - */ - sub %g2, 0x8, %g2 ! A0 Group (reg-dep) - alignaddr %o1, %g0, %g1 ! MS (Break-after) - EX(ldda [%g1 + 0x00] %asi, %f0, add %o2, %g0) ! MS Group (1-cycle stall) - add %g1, 0x8, %g1 ! A0 - -1: EX(ldda [%g1 + 0x00] %asi, %f2, add %o2, %g0) ! MS Group - add %g1, 0x8, %g1 ! A0 - sub %o2, 0x8, %o2 ! A1 - subcc %g2, 0x8, %g2 ! A0 Group - - faligndata %f0, %f2, %f8 ! FGA Group (1-cycle stall) - EX(stda %f8, [%o0 + 0x00] %asi, add %o2, 0x8) ! MS Group (2-cycle stall) - add %o1, 0x8, %o1 ! A0 - be,pn %icc, 2f ! BR - - add %o0, 0x8, %o0 ! A1 - EX(ldda [%g1 + 0x00] %asi, %f0, add %o2, %g0) ! MS Group - add %g1, 0x8, %g1 ! A0 - sub %o2, 0x8, %o2 ! A1 - - subcc %g2, 0x8, %g2 ! A0 Group - faligndata %f2, %f0, %f8 ! FGA Group (1-cycle stall) - EX(stda %f8, [%o0 + 0x00] %asi, add %o2, 0x8) ! MS Group (2-cycle stall) - add %o1, 0x8, %o1 ! A0 - - bne,pn %icc, 1b ! BR - add %o0, 0x8, %o0 ! A1 - - /* Nothing left to copy? */ -2: cmp %o2, 0 ! A0 Group - VISExitHalf ! A0+MS - be,pn %icc, U3copy_in_user_short_ret ! BR Group - nop ! A0 - ba,a,pt %xcc, U3copy_in_user_short ! BR Group - -#else /* !(SMALL_COPY_USES_FPU) */ - - xor %o1, %o0, %g2 - andcc %g2, 0x7, %g0 - bne,pn %icc, U3copy_in_user_short - andcc %o1, 0x7, %g2 - - be,pt %xcc, 2f - sub %g2, 0x8, %g2 - sub %g0, %g2, %g2 - sub %o2, %g2, %o2 - -1: EXNV2(lduba [%o1 + 0x00] %asi, %o3, add %o2, %g2) - add %o1, 0x1, %o1 - add %o0, 0x1, %o0 - subcc %g2, 0x1, %g2 - bg,pt %icc, 1b - EXNV2(stba %o3, [%o0 + -1] %asi, add %o2, %g2) - -2: andn %o2, 0x7, %g2 - sub %o2, %g2, %o2 - -3: EXNV3(ldxa [%o1 + 0x00] %asi, %o3, add %o2, %g2) - add %o1, 0x8, %o1 - add %o0, 0x8, %o0 - subcc %g2, 0x8, %g2 - bg,pt %icc, 3b - EXNV3(stxa %o3, [%o0 + -8] %asi, add %o2, %g2) - - cmp %o2, 0 - bne,pn %icc, U3copy_in_user_short - nop - ba,a,pt %xcc, U3copy_in_user_short_ret - -#endif /* !(SMALL_COPY_USES_FPU) */ diff -urN linux-2.4.27/arch/sparc64/lib/U3copy_to_user.S linux-2.4.28/arch/sparc64/lib/U3copy_to_user.S --- linux-2.4.27/arch/sparc64/lib/U3copy_to_user.S 2002-11-28 15:53:12.000000000 -0800 +++ linux-2.4.28/arch/sparc64/lib/U3copy_to_user.S 2004-11-17 03:54:21.147379351 -0800 @@ -1,547 +1,33 @@ -/* $Id: U3copy_to_user.S,v 1.3 2000/11/01 09:29:19 davem Exp $ - * U3memcpy.S: UltraSparc-III optimized copy to userspace. +/* U3copy_to_user.S: UltraSparc-III optimized copy to userspace. * - * Copyright (C) 1999, 2000 David S. Miller (davem@redhat.com) + * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#ifdef __KERNEL__ -#include -#include -#include -#include -#undef SMALL_COPY_USES_FPU -#define EXNV(x,y,a,b) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - a, b, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXNV2(x,y,a,b) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: a, b, %o0; \ - retl; \ - add %o0, 1, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXNV3(x,y,a,b) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: a, b, %o0; \ - retl; \ - add %o0, 8, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EX(x,y,a,b) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - retl; \ - a, b, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXBLK1(x,y) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - add %o4, 0x1c0, %o1; \ - and %o2, (0x40 - 1), %o2; \ - retl; \ - add %o1, %o2, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXBLK2(x,y) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - sll %o3, 6, %o3; \ - and %o2, (0x40 - 1), %o2; \ - add %o3, 0x80, %o1; \ - retl; \ - add %o1, %o2, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXBLK3(x,y) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - and %o2, (0x40 - 1), %o2; \ - retl; \ - add %o2, 0x80, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXBLK4(x,y) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: VISExitHalf; \ - and %o2, (0x40 - 1), %o2; \ - retl; \ - add %o2, 0x40, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#else -#define ASI_AIUS 0x80 -#define ASI_BLK_AIUS 0xf0 -#define FPRS_FEF 0x04 -#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs -#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs -#define SMALL_COPY_USES_FPU -#define EXNV(x,y,a,b) x,y; -#define EXNV2(x,y,a,b) x,y; -#define EXNV3(x,y,a,b) x,y; -#define EX(x,y,a,b) x,y; -#define EXBLK1(x,y) x,y; -#define EXBLK2(x,y) x,y; -#define EXBLK3(x,y) x,y; -#define EXBLK4(x,y) x,y; -#endif - - /* Special/non-trivial issues of this code: - * - * 1) %o5 is preserved from VISEntryHalf to VISExitHalf - * 2) Only low 32 FPU registers are used so that only the - * lower half of the FPU register set is dirtied by this - * code. This is especially important in the kernel. - * 3) This code never prefetches cachelines past the end - * of the source buffer. - */ +#define EX_ST(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: retl; \ + mov 1, %o0; \ + .section __ex_table; \ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + +#define FUNC_NAME U3copy_to_user +#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS +#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS +#define EX_RETVAL(x) 0 - .text - .align 32 - - /* The cheetah's flexible spine, oversized liver, enlarged heart, - * slender muscular body, and claws make it the swiftest hunter - * in Africa and the fastest animal on land. Can reach speeds - * of up to 2.4GB per second. - */ - - .globl U3copy_to_user -U3copy_to_user: /* %o0=dst, %o1=src, %o2=len */ /* Writing to %asi is _expensive_ so we hardcode it. * Reading %asi to check for KERNEL_DS is comparatively * cheap. */ - rd %asi, %g1 ! MS Group (4 cycles) - cmp %g1, ASI_AIUS ! A0 Group - bne U3memcpy ! BR - nop ! A1 -#ifndef __KERNEL__ - /* Save away original 'dst' for memcpy return value. */ - mov %o0, %g3 ! A0 Group -#endif - /* Anything to copy at all? */ - cmp %o2, 0 ! A1 - ble,pn %icc, U3copy_to_user_short_ret ! BR - - /* Extremely small copy? */ - cmp %o2, 31 ! A0 Group - ble,pn %icc, U3copy_to_user_short ! BR - - /* Large enough to use unrolled prefetch loops? */ - cmp %o2, 0x100 ! A1 - bge,a,pt %icc, U3copy_to_user_enter ! BR Group - andcc %o0, 0x3f, %g2 ! A0 - - ba,pt %xcc, U3copy_to_user_toosmall ! BR Group - andcc %o0, 0x7, %g2 ! A0 - - .align 32 -U3copy_to_user_short: - /* Copy %o2 bytes from src to dst, one byte at a time. */ - ldub [%o1 + 0x00], %o3 ! MS Group - add %o1, 0x1, %o1 ! A0 - add %o0, 0x1, %o0 ! A1 - subcc %o2, 1, %o2 ! A0 Group - - bg,pt %icc, U3copy_to_user_short ! BR - EXNV(stba %o3, [%o0 + -1] %asi, add %o2, 1) ! MS Group (1-cycle stall) - -U3copy_to_user_short_ret: -#ifdef __KERNEL__ - retl ! BR Group (0-4 cycle stall) - clr %o0 ! A0 -#else - retl ! BR Group (0-4 cycle stall) - mov %g3, %o0 ! A0 -#endif - - /* Here len >= (6 * 64) and condition codes reflect execution - * of "andcc %o0, 0x7, %g2", done by caller. - */ - .align 64 -U3copy_to_user_enter: - /* Is 'dst' already aligned on an 64-byte boundary? */ - be,pt %xcc, 2f ! BR - - /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number - * of bytes to copy to make 'dst' 64-byte aligned. We pre- - * subtract this from 'len'. - */ - sub %g2, 0x40, %g2 ! A0 Group - sub %g0, %g2, %g2 ! A0 Group - sub %o2, %g2, %o2 ! A0 Group - - /* Copy %g2 bytes from src to dst, one byte at a time. */ -1: ldub [%o1 + 0x00], %o3 ! MS (Group) - add %o1, 0x1, %o1 ! A1 - add %o0, 0x1, %o0 ! A0 Group - subcc %g2, 0x1, %g2 ! A1 - - bg,pt %icc, 1b ! BR Group - EXNV2(stba %o3, [%o0 + -1] %asi, add %o2, %g2) ! MS Group - -2: VISEntryHalf ! MS+MS - and %o1, 0x7, %g1 ! A1 - ba,pt %xcc, U3copy_to_user_begin ! BR - alignaddr %o1, %g0, %o1 ! MS (Break-after) - - .align 64 -U3copy_to_user_begin: -#ifdef __KERNEL__ - .globl U3copy_to_user_nop_1_6 -U3copy_to_user_nop_1_6: - ldxa [%g0] ASI_DCU_CONTROL_REG, %g3 - sethi %uhi(DCU_PE), %o3 - sllx %o3, 32, %o3 - or %g3, %o3, %o3 - stxa %o3, [%g0] ASI_DCU_CONTROL_REG ! Enable P-cache - membar #Sync -#endif - prefetch [%o1 + 0x000], #one_read ! MS Group1 - prefetch [%o1 + 0x040], #one_read ! MS Group2 - andn %o2, (0x40 - 1), %o4 ! A0 - prefetch [%o1 + 0x080], #one_read ! MS Group3 - cmp %o4, 0x140 ! A0 - prefetch [%o1 + 0x0c0], #one_read ! MS Group4 - ldd [%o1 + 0x000], %f0 ! MS Group5 (%f0 results at G8) - bge,a,pt %icc, 1f ! BR - - prefetch [%o1 + 0x100], #one_read ! MS Group6 -1: ldd [%o1 + 0x008], %f2 ! AX (%f2 results at G9) - cmp %o4, 0x180 ! A1 - bge,a,pt %icc, 1f ! BR - prefetch [%o1 + 0x140], #one_read ! MS Group7 -1: ldd [%o1 + 0x010], %f4 ! AX (%f4 results at G10) - cmp %o4, 0x1c0 ! A1 - bge,a,pt %icc, 1f ! BR - - prefetch [%o1 + 0x180], #one_read ! MS Group8 -1: faligndata %f0, %f2, %f16 ! FGA Group9 (%f16 at G12) - ldd [%o1 + 0x018], %f6 ! AX (%f6 results at G12) - faligndata %f2, %f4, %f18 ! FGA Group10 (%f18 results at G13) - ldd [%o1 + 0x020], %f8 ! MS (%f8 results at G13) - faligndata %f4, %f6, %f20 ! FGA Group12 (1-cycle stall,%f20 at G15) - ldd [%o1 + 0x028], %f10 ! MS (%f10 results at G15) - faligndata %f6, %f8, %f22 ! FGA Group13 (%f22 results at G16) - - ldd [%o1 + 0x030], %f12 ! MS (%f12 results at G16) - faligndata %f8, %f10, %f24 ! FGA Group15 (1-cycle stall,%f24 at G18) - ldd [%o1 + 0x038], %f14 ! MS (%f14 results at G18) - faligndata %f10, %f12, %f26 ! FGA Group16 (%f26 results at G19) - ldd [%o1 + 0x040], %f0 ! MS (%f0 results at G19) - - /* We only use the first loop if len > (7 * 64). */ - subcc %o4, 0x1c0, %o4 ! A0 Group17 - bg,pt %icc, U3copy_to_user_loop1 ! BR - add %o1, 0x40, %o1 ! A1 - - add %o4, 0x140, %o4 ! A0 Group18 - ba,pt %xcc, U3copy_to_user_loop2 ! BR - srl %o4, 6, %o3 ! A0 Group19 - nop - nop - nop - nop - nop - - nop - nop - - /* This loop performs the copy and queues new prefetches. - * We drop into the second loop when len <= (5 * 64). Note - * that this (5 * 64) factor has been subtracted from len - * already. - */ -U3copy_to_user_loop1: - ldd [%o1 + 0x008], %f2 ! MS Group2 (%f2 results at G5) - faligndata %f12, %f14, %f28 ! FGA (%f28 results at G5) - ldd [%o1 + 0x010], %f4 ! MS Group3 (%f4 results at G6) - faligndata %f14, %f0, %f30 ! FGA Group4 (1-cycle stall, %f30 at G7) - EXBLK1(stda %f16, [%o0] ASI_BLK_AIUS) ! MS - ldd [%o1 + 0x018], %f6 ! AX (%f6 results at G7) - - faligndata %f0, %f2, %f16 ! FGA Group12 (7-cycle stall) - ldd [%o1 + 0x020], %f8 ! MS (%f8 results at G15) - faligndata %f2, %f4, %f18 ! FGA Group13 (%f18 results at G16) - ldd [%o1 + 0x028], %f10 ! MS (%f10 results at G16) - faligndata %f4, %f6, %f20 ! FGA Group14 (%f20 results at G17) - ldd [%o1 + 0x030], %f12 ! MS (%f12 results at G17) - faligndata %f6, %f8, %f22 ! FGA Group15 (%f22 results at G18) - ldd [%o1 + 0x038], %f14 ! MS (%f14 results at G18) - - faligndata %f8, %f10, %f24 ! FGA Group16 (%f24 results at G19) - ldd [%o1 + 0x040], %f0 ! AX (%f0 results at G19) - prefetch [%o1 + 0x180], #one_read ! MS - faligndata %f10, %f12, %f26 ! FGA Group17 (%f26 results at G20) - subcc %o4, 0x40, %o4 ! A0 - add %o1, 0x40, %o1 ! A1 - bg,pt %xcc, U3copy_to_user_loop1 ! BR - add %o0, 0x40, %o0 ! A0 Group18 - -U3copy_to_user_loop2_enter: - mov 5, %o3 ! A1 - - /* This loop performs on the copy, no new prefetches are - * queued. We do things this way so that we do not perform - * any spurious prefetches past the end of the src buffer. - */ -U3copy_to_user_loop2: - ldd [%o1 + 0x008], %f2 ! MS - faligndata %f12, %f14, %f28 ! FGA Group2 - ldd [%o1 + 0x010], %f4 ! MS - faligndata %f14, %f0, %f30 ! FGA Group4 (1-cycle stall) - EXBLK2(stda %f16, [%o0] ASI_BLK_AIUS) ! MS - ldd [%o1 + 0x018], %f6 ! AX - faligndata %f0, %f2, %f16 ! FGA Group12 (7-cycle stall) - - ldd [%o1 + 0x020], %f8 ! MS - faligndata %f2, %f4, %f18 ! FGA Group13 - ldd [%o1 + 0x028], %f10 ! MS - faligndata %f4, %f6, %f20 ! FGA Group14 - ldd [%o1 + 0x030], %f12 ! MS - faligndata %f6, %f8, %f22 ! FGA Group15 - ldd [%o1 + 0x038], %f14 ! MS - faligndata %f8, %f10, %f24 ! FGA Group16 - - ldd [%o1 + 0x040], %f0 ! AX - faligndata %f10, %f12, %f26 ! FGA Group17 - subcc %o3, 0x01, %o3 ! A0 - add %o1, 0x40, %o1 ! A1 - bg,pt %xcc, U3copy_to_user_loop2 ! BR - add %o0, 0x40, %o0 ! A0 Group18 - - /* Finally we copy the last full 64-byte block. */ -U3copy_to_user_loopfini: - ldd [%o1 + 0x008], %f2 ! MS - faligndata %f12, %f14, %f28 ! FGA - ldd [%o1 + 0x010], %f4 ! MS Group19 - faligndata %f14, %f0, %f30 ! FGA - EXBLK3(stda %f16, [%o0] ASI_BLK_AIUS) ! MS Group20 - ldd [%o1 + 0x018], %f6 ! AX - faligndata %f0, %f2, %f16 ! FGA Group11 (7-cycle stall) - ldd [%o1 + 0x020], %f8 ! MS - faligndata %f2, %f4, %f18 ! FGA Group12 - ldd [%o1 + 0x028], %f10 ! MS - faligndata %f4, %f6, %f20 ! FGA Group13 - ldd [%o1 + 0x030], %f12 ! MS - faligndata %f6, %f8, %f22 ! FGA Group14 - ldd [%o1 + 0x038], %f14 ! MS - faligndata %f8, %f10, %f24 ! FGA Group15 - cmp %g1, 0 ! A0 - be,pt %icc, 1f ! BR - add %o0, 0x40, %o0 ! A1 - ldd [%o1 + 0x040], %f0 ! MS -1: faligndata %f10, %f12, %f26 ! FGA Group16 - faligndata %f12, %f14, %f28 ! FGA Group17 - faligndata %f14, %f0, %f30 ! FGA Group18 - EXBLK4(stda %f16, [%o0] ASI_BLK_AIUS) ! MS - add %o0, 0x40, %o0 ! A0 - add %o1, 0x40, %o1 ! A1 -#ifdef __KERNEL__ - .globl U3copy_to_user_nop_2_3 -U3copy_to_user_nop_2_3: - mov PRIMARY_CONTEXT, %o3 - stxa %g0, [%o3] ASI_DMMU ! Flush P-cache - stxa %g3, [%g0] ASI_DCU_CONTROL_REG ! Disable P-cache -#endif - membar #Sync ! MS Group26 (7-cycle stall) - - /* Now we copy the (len modulo 64) bytes at the end. - * Note how we borrow the %f0 loaded above. - * - * Also notice how this code is careful not to perform a - * load past the end of the src buffer just like similar - * code found in U3copy_to_user_toosmall processing. - */ -U3copy_to_user_loopend: - and %o2, 0x3f, %o2 ! A0 Group - andcc %o2, 0x38, %g2 ! A0 Group - be,pn %icc, U3copy_to_user_endcruft ! BR - subcc %g2, 0x8, %g2 ! A1 - be,pn %icc, U3copy_to_user_endcruft ! BR Group - cmp %g1, 0 ! A0 - - be,a,pt %icc, 1f ! BR Group - ldd [%o1 + 0x00], %f0 ! MS - -1: ldd [%o1 + 0x08], %f2 ! MS Group - add %o1, 0x8, %o1 ! A0 - sub %o2, 0x8, %o2 ! A1 - subcc %g2, 0x8, %g2 ! A0 Group - faligndata %f0, %f2, %f8 ! FGA Group - EX(stda %f8, [%o0 + 0x00] %asi, add %o2, 0x8) ! MS (XXX does it stall here? XXX) - be,pn %icc, U3copy_to_user_endcruft ! BR - add %o0, 0x8, %o0 ! A0 - ldd [%o1 + 0x08], %f0 ! MS Group - add %o1, 0x8, %o1 ! A0 - sub %o2, 0x8, %o2 ! A1 - subcc %g2, 0x8, %g2 ! A0 Group - faligndata %f2, %f0, %f8 ! FGA - EX(stda %f8, [%o0 + 0x00] %asi, add %o2, 0x8) ! MS (XXX does it stall here? XXX) - bne,pn %icc, 1b ! BR - add %o0, 0x8, %o0 ! A0 Group - - /* If anything is left, we copy it one byte at a time. - * Note that %g1 is (src & 0x3) saved above before the - * alignaddr was performed. - */ -U3copy_to_user_endcruft: - cmp %o2, 0 - add %o1, %g1, %o1 - VISExitHalf - be,pn %icc, U3copy_to_user_short_ret - nop - ba,a,pt %xcc, U3copy_to_user_short - - /* If we get here, then 32 <= len < (6 * 64) */ -U3copy_to_user_toosmall: - -#ifdef SMALL_COPY_USES_FPU - - /* Is 'dst' already aligned on an 8-byte boundary? */ - be,pt %xcc, 2f ! BR Group - - /* Compute abs((dst & 7) - 8) into %g2. This is the number - * of bytes to copy to make 'dst' 8-byte aligned. We pre- - * subtract this from 'len'. - */ - sub %g2, 0x8, %g2 ! A0 - sub %g0, %g2, %g2 ! A0 Group (reg-dep) - sub %o2, %g2, %o2 ! A0 Group (reg-dep) - - /* Copy %g2 bytes from src to dst, one byte at a time. */ -1: ldub [%o1 + 0x00], %o3 ! MS (Group) (%o3 in 3 cycles) - add %o1, 0x1, %o1 ! A1 - add %o0, 0x1, %o0 ! A0 Group - subcc %g2, 0x1, %g2 ! A1 - - bg,pt %icc, 1b ! BR Group - EXNV2(stba %o3, [%o0 + -1] %asi, add %o2, %g2) ! MS Group - -2: VISEntryHalf ! MS+MS - - /* Compute (len - (len % 8)) into %g2. This is guarenteed - * to be nonzero. - */ - andn %o2, 0x7, %g2 ! A0 Group - - /* You may read this and believe that it allows reading - * one 8-byte longword past the end of src. It actually - * does not, as %g2 is subtracted as loads are done from - * src, so we always stop before running off the end. - * Also, we are guarenteed to have at least 0x10 bytes - * to move here. - */ - sub %g2, 0x8, %g2 ! A0 Group (reg-dep) - alignaddr %o1, %g0, %g1 ! MS (Break-after) - ldd [%g1 + 0x00], %f0 ! MS Group (1-cycle stall) - add %g1, 0x8, %g1 ! A0 - -1: ldd [%g1 + 0x00], %f2 ! MS Group - add %g1, 0x8, %g1 ! A0 - sub %o2, 0x8, %o2 ! A1 - subcc %g2, 0x8, %g2 ! A0 Group - - faligndata %f0, %f2, %f8 ! FGA Group (1-cycle stall) - EX(stda %f8, [%o0 + 0x00] %asi, add %o2, 0x8) ! MS Group (2-cycle stall) - add %o1, 0x8, %o1 ! A0 - be,pn %icc, 2f ! BR - - add %o0, 0x8, %o0 ! A1 - ldd [%g1 + 0x00], %f0 ! MS Group - add %g1, 0x8, %g1 ! A0 - sub %o2, 0x8, %o2 ! A1 - - subcc %g2, 0x8, %g2 ! A0 Group - faligndata %f2, %f0, %f8 ! FGA Group (1-cycle stall) - EX(stda %f8, [%o0 + 0x00] %asi, add %o2, 0x8) ! MS Group (2-cycle stall) - add %o1, 0x8, %o1 ! A0 - - bne,pn %icc, 1b ! BR - add %o0, 0x8, %o0 ! A1 - - /* Nothing left to copy? */ -2: cmp %o2, 0 ! A0 Group - VISExitHalf ! A0+MS - be,pn %icc, U3copy_to_user_short_ret ! BR Group - nop ! A0 - ba,a,pt %xcc, U3copy_to_user_short ! BR Group - -#else /* !(SMALL_COPY_USES_FPU) */ - - xor %o1, %o0, %g2 - andcc %g2, 0x7, %g0 - bne,pn %icc, U3copy_to_user_short - andcc %o1, 0x7, %g2 - - be,pt %xcc, 2f - sub %g2, 0x8, %g2 - sub %g0, %g2, %g2 - sub %o2, %g2, %o2 - -1: ldub [%o1 + 0x00], %o3 - add %o1, 0x1, %o1 - add %o0, 0x1, %o0 - subcc %g2, 0x1, %g2 - bg,pt %icc, 1b - EXNV2(stba %o3, [%o0 + -1] %asi, add %o2, %g2) - -2: andn %o2, 0x7, %g2 - sub %o2, %g2, %o2 - -3: ldx [%o1 + 0x00], %o3 - add %o1, 0x8, %o1 - add %o0, 0x8, %o0 - subcc %g2, 0x8, %g2 - bg,pt %icc, 3b - EXNV3(stxa %o3, [%o0 + -8] %asi, add %o2, %g2) - - cmp %o2, 0 - bne,pn %icc, U3copy_to_user_short - nop - ba,a,pt %xcc, U3copy_to_user_short_ret +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, memcpy_user_stub; \ + nop; \ -#endif /* !(SMALL_COPY_USES_FPU) */ +#include "U3memcpy.S" diff -urN linux-2.4.27/arch/sparc64/lib/U3memcpy.S linux-2.4.28/arch/sparc64/lib/U3memcpy.S --- linux-2.4.27/arch/sparc64/lib/U3memcpy.S 2002-11-28 15:53:12.000000000 -0800 +++ linux-2.4.28/arch/sparc64/lib/U3memcpy.S 2004-11-17 03:54:21.149379433 -0800 @@ -1,23 +1,63 @@ -/* $Id: U3memcpy.S,v 1.2 2000/11/01 09:29:19 davem Exp $ - * U3memcpy.S: UltraSparc-III optimized memcpy. +/* U3memcpy.S: UltraSparc-III optimized memcpy. * - * Copyright (C) 1999, 2000 David S. Miller (davem@redhat.com) + * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ #ifdef __KERNEL__ #include #include -#include -#include -#undef SMALL_COPY_USES_FPU #else #define ASI_BLK_P 0xf0 #define FPRS_FEF 0x04 +#ifdef MEMCPY_DEBUG +#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ + clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs +#else #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs -#define SMALL_COPY_USES_FPU +#endif +#endif + +#ifndef EX_LD +#define EX_LD(x) x +#endif + +#ifndef EX_ST +#define EX_ST(x) x +#endif + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x #endif +#ifndef LOAD +#define LOAD(type,addr,dest) type [addr], dest +#endif + +#ifndef STORE +#define STORE(type,src,addr) type src, [addr] +#endif + +#ifndef STORE_BLK +#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P +#endif + +#ifndef FUNC_NAME +#define FUNC_NAME U3memcpy +#endif + +#ifndef PREAMBLE +#define PREAMBLE +#endif + +#ifndef XCC +#define XCC xcc +#endif + + .register %g2,#scratch + .register %g3,#scratch + /* Special/non-trivial issues of this code: * * 1) %o5 is preserved from VISEntryHalf to VISExitHalf @@ -29,7 +69,7 @@ */ .text - .align 32 + .align 64 /* The cheetah's flexible spine, oversized liver, enlarged heart, * slender muscular body, and claws make it the swiftest hunter @@ -37,392 +77,338 @@ * of up to 2.4GB per second. */ - .globl U3memcpy -U3memcpy: /* %o0=dst, %o1=src, %o2=len */ -#ifndef __KERNEL__ - /* Save away original 'dst' for memcpy return value. */ - mov %o0, %g3 ! A0 Group -#endif - /* Anything to copy at all? */ - cmp %o2, 0 ! A1 - ble,pn %icc, U3memcpy_short_ret ! BR - - /* Extremely small copy? */ - cmp %o2, 31 ! A0 Group - ble,pn %icc, U3memcpy_short ! BR - - /* Large enough to use unrolled prefetch loops? */ - cmp %o2, 0x100 ! A1 - bge,a,pt %icc, U3memcpy_enter ! BR Group - andcc %o0, 0x3f, %g2 ! A0 - - ba,pt %xcc, U3memcpy_toosmall ! BR Group - andcc %o0, 0x7, %g2 ! A0 - - .align 32 -U3memcpy_short: - /* Copy %o2 bytes from src to dst, one byte at a time. */ - ldub [%o1 + 0x00], %o3 ! MS Group - add %o1, 0x1, %o1 ! A0 - add %o0, 0x1, %o0 ! A1 - subcc %o2, 1, %o2 ! A0 Group - - bg,pt %icc, U3memcpy_short ! BR - stb %o3, [%o0 + -1] ! MS Group (1-cycle stall) - -U3memcpy_short_ret: -#ifdef __KERNEL__ - retl ! BR Group (0-4 cycle stall) - clr %o0 ! A0 -#else - retl ! BR Group (0-4 cycle stall) - mov %g3, %o0 ! A0 -#endif + .globl FUNC_NAME +FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ + PREAMBLE + mov %o0, %g5 + cmp %o2, 0 + be,pn %XCC, 85f + or %o0, %o1, %o3 + cmp %o2, 16 + blu,a,pn %XCC, 80f + or %o3, %o2, %o3 + + cmp %o2, (3 * 64) + blu,pt %XCC, 70f + andcc %o3, 0x7, %g0 - /* Here len >= (6 * 64) and condition codes reflect execution - * of "andcc %o0, 0x7, %g2", done by caller. + /* Clobbers o5/g1/g2/g3/g7/icc/xcc. We must preserve + * o5 from here until we hit VISExitHalf. */ - .align 64 -U3memcpy_enter: + VISEntryHalf + /* Is 'dst' already aligned on an 64-byte boundary? */ - be,pt %xcc, 2f ! BR + andcc %o0, 0x3f, %g2 + be,pt %XCC, 2f /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number * of bytes to copy to make 'dst' 64-byte aligned. We pre- * subtract this from 'len'. */ - sub %g2, 0x40, %g2 ! A0 Group - sub %g0, %g2, %g2 ! A0 Group - sub %o2, %g2, %o2 ! A0 Group - - /* Copy %g2 bytes from src to dst, one byte at a time. */ -1: ldub [%o1 + 0x00], %o3 ! MS (Group) - add %o1, 0x1, %o1 ! A1 - add %o0, 0x1, %o0 ! A0 Group - subcc %g2, 0x1, %g2 ! A1 - - bg,pt %icc, 1b ! BR Group - stb %o3, [%o0 + -1] ! MS Group - -2: VISEntryHalf ! MS+MS - and %o1, 0x7, %g1 ! A1 - ba,pt %xcc, U3memcpy_begin ! BR - alignaddr %o1, %g0, %o1 ! MS (Break-after) + sub %o0, %o1, %o4 + sub %g2, 0x40, %g2 + sub %g0, %g2, %g2 + sub %o2, %g2, %o2 + andcc %g2, 0x7, %g1 + be,pt %icc, 2f + and %g2, 0x38, %g2 + +1: subcc %g1, 0x1, %g1 + EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) + EX_ST(STORE(stb, %o3, %o1 + %o4)) + bgu,pt %XCC, 1b + add %o1, 0x1, %o1 + + add %o1, %o4, %o0 + +2: cmp %g2, 0x0 + and %o1, 0x7, %g1 + be,pt %icc, 3f + alignaddr %o1, %g0, %o1 + + EX_LD(LOAD(ldd, %o1, %f4)) +1: EX_LD(LOAD(ldd, %o1 + 0x8, %f6)) + add %o1, 0x8, %o1 + subcc %g2, 0x8, %g2 + faligndata %f4, %f6, %f0 + EX_ST(STORE(std, %f0, %o0)) + be,pn %icc, 3f + add %o0, 0x8, %o0 + + EX_LD(LOAD(ldd, %o1 + 0x8, %f4)) + add %o1, 0x8, %o1 + subcc %g2, 0x8, %g2 + faligndata %f6, %f4, %f2 + EX_ST(STORE(std, %f2, %o0)) + bne,pt %icc, 1b + add %o0, 0x8, %o0 + +3: LOAD(prefetch, %o1 + 0x000, #one_read) + LOAD(prefetch, %o1 + 0x040, #one_read) + andn %o2, (0x40 - 1), %o4 + LOAD(prefetch, %o1 + 0x080, #one_read) + LOAD(prefetch, %o1 + 0x0c0, #one_read) + LOAD(prefetch, %o1 + 0x100, #one_read) + EX_LD(LOAD(ldd, %o1 + 0x000, %f0)) + LOAD(prefetch, %o1 + 0x140, #one_read) + EX_LD(LOAD(ldd, %o1 + 0x008, %f2)) + LOAD(prefetch, %o1 + 0x180, #one_read) + EX_LD(LOAD(ldd, %o1 + 0x010, %f4)) + LOAD(prefetch, %o1 + 0x1c0, #one_read) + faligndata %f0, %f2, %f16 + EX_LD(LOAD(ldd, %o1 + 0x018, %f6)) + faligndata %f2, %f4, %f18 + EX_LD(LOAD(ldd, %o1 + 0x020, %f8)) + faligndata %f4, %f6, %f20 + EX_LD(LOAD(ldd, %o1 + 0x028, %f10)) + faligndata %f6, %f8, %f22 + + EX_LD(LOAD(ldd, %o1 + 0x030, %f12)) + faligndata %f8, %f10, %f24 + EX_LD(LOAD(ldd, %o1 + 0x038, %f14)) + faligndata %f10, %f12, %f26 + EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) + + subcc %o4, 0x80, %o4 + add %o1, 0x40, %o1 + bgu,pt %XCC, 1f + srl %o4, 6, %o3 + ba,pt %xcc, 2f + nop .align 64 -U3memcpy_begin: -#ifdef __KERNEL__ - .globl U3memcpy_nop_1_6 -U3memcpy_nop_1_6: - ldxa [%g0] ASI_DCU_CONTROL_REG, %g3 - sethi %uhi(DCU_PE), %o3 - sllx %o3, 32, %o3 - or %g3, %o3, %o3 - stxa %o3, [%g0] ASI_DCU_CONTROL_REG ! Enable P-cache - membar #Sync -#endif - prefetch [%o1 + 0x000], #one_read ! MS Group1 - prefetch [%o1 + 0x040], #one_read ! MS Group2 - andn %o2, (0x40 - 1), %o4 ! A0 - prefetch [%o1 + 0x080], #one_read ! MS Group3 - cmp %o4, 0x140 ! A0 - prefetch [%o1 + 0x0c0], #one_read ! MS Group4 - ldd [%o1 + 0x000], %f0 ! MS Group5 (%f0 results at G8) - bge,a,pt %icc, 1f ! BR - - prefetch [%o1 + 0x100], #one_read ! MS Group6 -1: ldd [%o1 + 0x008], %f2 ! AX (%f2 results at G9) - cmp %o4, 0x180 ! A1 - bge,a,pt %icc, 1f ! BR - prefetch [%o1 + 0x140], #one_read ! MS Group7 -1: ldd [%o1 + 0x010], %f4 ! AX (%f4 results at G10) - cmp %o4, 0x1c0 ! A1 - bge,a,pt %icc, 1f ! BR - - prefetch [%o1 + 0x180], #one_read ! MS Group8 -1: faligndata %f0, %f2, %f16 ! FGA Group9 (%f16 at G12) - ldd [%o1 + 0x018], %f6 ! AX (%f6 results at G12) - faligndata %f2, %f4, %f18 ! FGA Group10 (%f18 results at G13) - ldd [%o1 + 0x020], %f8 ! MS (%f8 results at G13) - faligndata %f4, %f6, %f20 ! FGA Group12 (1-cycle stall,%f20 at G15) - ldd [%o1 + 0x028], %f10 ! MS (%f10 results at G15) - faligndata %f6, %f8, %f22 ! FGA Group13 (%f22 results at G16) - - ldd [%o1 + 0x030], %f12 ! MS (%f12 results at G16) - faligndata %f8, %f10, %f24 ! FGA Group15 (1-cycle stall,%f24 at G18) - ldd [%o1 + 0x038], %f14 ! MS (%f14 results at G18) - faligndata %f10, %f12, %f26 ! FGA Group16 (%f26 results at G19) - ldd [%o1 + 0x040], %f0 ! MS (%f0 results at G19) - - /* We only use the first loop if len > (7 * 64). */ - subcc %o4, 0x1c0, %o4 ! A0 Group17 - bg,pt %icc, U3memcpy_loop1 ! BR - add %o1, 0x40, %o1 ! A1 - - add %o4, 0x140, %o4 ! A0 Group18 - ba,pt %xcc, U3memcpy_loop2 ! BR - srl %o4, 6, %o3 ! A0 Group19 - nop - nop - nop - nop - nop - - nop - nop - - /* This loop performs the copy and queues new prefetches. - * We drop into the second loop when len <= (5 * 64). Note - * that this (5 * 64) factor has been subtracted from len - * already. - */ -U3memcpy_loop1: - ldd [%o1 + 0x008], %f2 ! MS Group2 (%f2 results at G5) - faligndata %f12, %f14, %f28 ! FGA (%f28 results at G5) - ldd [%o1 + 0x010], %f4 ! MS Group3 (%f4 results at G6) - faligndata %f14, %f0, %f30 ! FGA Group4 (1-cycle stall, %f30 at G7) - stda %f16, [%o0] ASI_BLK_P ! MS - ldd [%o1 + 0x018], %f6 ! AX (%f6 results at G7) - - faligndata %f0, %f2, %f16 ! FGA Group12 (7-cycle stall) - ldd [%o1 + 0x020], %f8 ! MS (%f8 results at G15) - faligndata %f2, %f4, %f18 ! FGA Group13 (%f18 results at G16) - ldd [%o1 + 0x028], %f10 ! MS (%f10 results at G16) - faligndata %f4, %f6, %f20 ! FGA Group14 (%f20 results at G17) - ldd [%o1 + 0x030], %f12 ! MS (%f12 results at G17) - faligndata %f6, %f8, %f22 ! FGA Group15 (%f22 results at G18) - ldd [%o1 + 0x038], %f14 ! MS (%f14 results at G18) - - faligndata %f8, %f10, %f24 ! FGA Group16 (%f24 results at G19) - ldd [%o1 + 0x040], %f0 ! AX (%f0 results at G19) - prefetch [%o1 + 0x180], #one_read ! MS - faligndata %f10, %f12, %f26 ! FGA Group17 (%f26 results at G20) - subcc %o4, 0x40, %o4 ! A0 - add %o1, 0x40, %o1 ! A1 - bg,pt %xcc, U3memcpy_loop1 ! BR - add %o0, 0x40, %o0 ! A0 Group18 - -U3memcpy_loop2_enter: - mov 5, %o3 ! A1 - - /* This loop performs on the copy, no new prefetches are - * queued. We do things this way so that we do not perform - * any spurious prefetches past the end of the src buffer. - */ -U3memcpy_loop2: - ldd [%o1 + 0x008], %f2 ! MS - faligndata %f12, %f14, %f28 ! FGA Group2 - ldd [%o1 + 0x010], %f4 ! MS - faligndata %f14, %f0, %f30 ! FGA Group4 (1-cycle stall) - stda %f16, [%o0] ASI_BLK_P ! MS - ldd [%o1 + 0x018], %f6 ! AX - faligndata %f0, %f2, %f16 ! FGA Group12 (7-cycle stall) - - ldd [%o1 + 0x020], %f8 ! MS - faligndata %f2, %f4, %f18 ! FGA Group13 - ldd [%o1 + 0x028], %f10 ! MS - faligndata %f4, %f6, %f20 ! FGA Group14 - ldd [%o1 + 0x030], %f12 ! MS - faligndata %f6, %f8, %f22 ! FGA Group15 - ldd [%o1 + 0x038], %f14 ! MS - faligndata %f8, %f10, %f24 ! FGA Group16 - - ldd [%o1 + 0x040], %f0 ! AX - faligndata %f10, %f12, %f26 ! FGA Group17 - subcc %o3, 0x01, %o3 ! A0 - add %o1, 0x40, %o1 ! A1 - bg,pt %xcc, U3memcpy_loop2 ! BR - add %o0, 0x40, %o0 ! A0 Group18 +1: + EX_LD(LOAD(ldd, %o1 + 0x008, %f2)) + faligndata %f12, %f14, %f28 + EX_LD(LOAD(ldd, %o1 + 0x010, %f4)) + faligndata %f14, %f0, %f30 + EX_ST(STORE_BLK(%f16, %o0)) + EX_LD(LOAD(ldd, %o1 + 0x018, %f6)) + faligndata %f0, %f2, %f16 + add %o0, 0x40, %o0 + + EX_LD(LOAD(ldd, %o1 + 0x020, %f8)) + faligndata %f2, %f4, %f18 + EX_LD(LOAD(ldd, %o1 + 0x028, %f10)) + faligndata %f4, %f6, %f20 + EX_LD(LOAD(ldd, %o1 + 0x030, %f12)) + subcc %o3, 0x01, %o3 + faligndata %f6, %f8, %f22 + EX_LD(LOAD(ldd, %o1 + 0x038, %f14)) + + faligndata %f8, %f10, %f24 + EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) + LOAD(prefetch, %o1 + 0x1c0, #one_read) + faligndata %f10, %f12, %f26 + bg,pt %XCC, 1b + add %o1, 0x40, %o1 /* Finally we copy the last full 64-byte block. */ -U3memcpy_loopfini: - ldd [%o1 + 0x008], %f2 ! MS - faligndata %f12, %f14, %f28 ! FGA - ldd [%o1 + 0x010], %f4 ! MS Group19 - faligndata %f14, %f0, %f30 ! FGA - stda %f16, [%o0] ASI_BLK_P ! MS Group20 - ldd [%o1 + 0x018], %f6 ! AX - faligndata %f0, %f2, %f16 ! FGA Group11 (7-cycle stall) - ldd [%o1 + 0x020], %f8 ! MS - faligndata %f2, %f4, %f18 ! FGA Group12 - ldd [%o1 + 0x028], %f10 ! MS - faligndata %f4, %f6, %f20 ! FGA Group13 - ldd [%o1 + 0x030], %f12 ! MS - faligndata %f6, %f8, %f22 ! FGA Group14 - ldd [%o1 + 0x038], %f14 ! MS - faligndata %f8, %f10, %f24 ! FGA Group15 - cmp %g1, 0 ! A0 - be,pt %icc, 1f ! BR - add %o0, 0x40, %o0 ! A1 - ldd [%o1 + 0x040], %f0 ! MS -1: faligndata %f10, %f12, %f26 ! FGA Group16 - faligndata %f12, %f14, %f28 ! FGA Group17 - faligndata %f14, %f0, %f30 ! FGA Group18 - stda %f16, [%o0] ASI_BLK_P ! MS - add %o0, 0x40, %o0 ! A0 - add %o1, 0x40, %o1 ! A1 -#ifdef __KERNEL__ - .globl U3memcpy_nop_2_3 -U3memcpy_nop_2_3: - mov PRIMARY_CONTEXT, %o3 - stxa %g0, [%o3] ASI_DMMU ! Flush P-cache - stxa %g3, [%g0] ASI_DCU_CONTROL_REG ! Disable P-cache -#endif - membar #Sync ! MS Group26 (7-cycle stall) +2: + EX_LD(LOAD(ldd, %o1 + 0x008, %f2)) + faligndata %f12, %f14, %f28 + EX_LD(LOAD(ldd, %o1 + 0x010, %f4)) + faligndata %f14, %f0, %f30 + EX_ST(STORE_BLK(%f16, %o0)) + EX_LD(LOAD(ldd, %o1 + 0x018, %f6)) + faligndata %f0, %f2, %f16 + EX_LD(LOAD(ldd, %o1 + 0x020, %f8)) + faligndata %f2, %f4, %f18 + EX_LD(LOAD(ldd, %o1 + 0x028, %f10)) + faligndata %f4, %f6, %f20 + EX_LD(LOAD(ldd, %o1 + 0x030, %f12)) + faligndata %f6, %f8, %f22 + EX_LD(LOAD(ldd, %o1 + 0x038, %f14)) + faligndata %f8, %f10, %f24 + cmp %g1, 0 + be,pt %XCC, 1f + add %o0, 0x40, %o0 + EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) +1: faligndata %f10, %f12, %f26 + faligndata %f12, %f14, %f28 + faligndata %f14, %f0, %f30 + EX_ST(STORE_BLK(%f16, %o0)) + add %o0, 0x40, %o0 + add %o1, 0x40, %o1 + membar #Sync /* Now we copy the (len modulo 64) bytes at the end. * Note how we borrow the %f0 loaded above. * * Also notice how this code is careful not to perform a - * load past the end of the src buffer just like similar - * code found in U3memcpy_toosmall processing. + * load past the end of the src buffer. */ -U3memcpy_loopend: - and %o2, 0x3f, %o2 ! A0 Group - andcc %o2, 0x38, %g2 ! A0 Group - be,pn %icc, U3memcpy_endcruft ! BR - subcc %g2, 0x8, %g2 ! A1 - be,pn %icc, U3memcpy_endcruft ! BR Group - cmp %g1, 0 ! A0 - - be,a,pt %icc, 1f ! BR Group - ldd [%o1 + 0x00], %f0 ! MS - -1: ldd [%o1 + 0x08], %f2 ! MS Group - add %o1, 0x8, %o1 ! A0 - sub %o2, 0x8, %o2 ! A1 - subcc %g2, 0x8, %g2 ! A0 Group - faligndata %f0, %f2, %f8 ! FGA Group - std %f8, [%o0 + 0x00] ! MS (XXX does it stall here? XXX) - be,pn %icc, U3memcpy_endcruft ! BR - add %o0, 0x8, %o0 ! A0 - ldd [%o1 + 0x08], %f0 ! MS Group - add %o1, 0x8, %o1 ! A0 - sub %o2, 0x8, %o2 ! A1 - subcc %g2, 0x8, %g2 ! A0 Group - faligndata %f2, %f0, %f8 ! FGA - std %f8, [%o0 + 0x00] ! MS (XXX does it stall here? XXX) - bne,pn %icc, 1b ! BR - add %o0, 0x8, %o0 ! A0 Group + and %o2, 0x3f, %o2 + andcc %o2, 0x38, %g2 + be,pn %XCC, 2f + subcc %g2, 0x8, %g2 + be,pn %XCC, 2f + cmp %g1, 0 + + sub %o2, %g2, %o2 + be,a,pt %XCC, 1f + EX_LD(LOAD(ldd, %o1 + 0x00, %f0)) + +1: EX_LD(LOAD(ldd, %o1 + 0x08, %f2)) + add %o1, 0x8, %o1 + subcc %g2, 0x8, %g2 + faligndata %f0, %f2, %f8 + EX_ST(STORE(std, %f8, %o0)) + be,pn %XCC, 2f + add %o0, 0x8, %o0 + EX_LD(LOAD(ldd, %o1 + 0x08, %f0)) + add %o1, 0x8, %o1 + subcc %g2, 0x8, %g2 + faligndata %f2, %f0, %f8 + EX_ST(STORE(std, %f8, %o0)) + bne,pn %XCC, 1b + add %o0, 0x8, %o0 /* If anything is left, we copy it one byte at a time. * Note that %g1 is (src & 0x3) saved above before the * alignaddr was performed. */ -U3memcpy_endcruft: +2: cmp %o2, 0 add %o1, %g1, %o1 VISExitHalf - be,pn %icc, U3memcpy_short_ret - nop - ba,a,pt %xcc, U3memcpy_short - - /* If we get here, then 32 <= len < (6 * 64) */ -U3memcpy_toosmall: + be,pn %XCC, 85f + sub %o0, %o1, %o3 -#ifdef SMALL_COPY_USES_FPU - - /* Is 'dst' already aligned on an 8-byte boundary? */ - be,pt %xcc, 2f ! BR Group - - /* Compute abs((dst & 7) - 8) into %g2. This is the number - * of bytes to copy to make 'dst' 8-byte aligned. We pre- - * subtract this from 'len'. - */ - sub %g2, 0x8, %g2 ! A0 - sub %g0, %g2, %g2 ! A0 Group (reg-dep) - sub %o2, %g2, %o2 ! A0 Group (reg-dep) - - /* Copy %g2 bytes from src to dst, one byte at a time. */ -1: ldub [%o1 + 0x00], %o3 ! MS (Group) (%o3 in 3 cycles) - add %o1, 0x1, %o1 ! A1 - add %o0, 0x1, %o0 ! A0 Group - subcc %g2, 0x1, %g2 ! A1 - - bg,pt %icc, 1b ! BR Group - stb %o3, [%o0 + -1] ! MS Group + andcc %g1, 0x7, %g0 + bne,pn %icc, 90f + andcc %o2, 0x8, %g0 + be,pt %icc, 1f + nop + EX_LD(LOAD(ldx, %o1, %o5)) + EX_ST(STORE(stx, %o5, %o1 + %o3)) + add %o1, 0x8, %o1 -2: VISEntryHalf ! MS+MS +1: andcc %o2, 0x4, %g0 + be,pt %icc, 1f + nop + EX_LD(LOAD(lduw, %o1, %o5)) + EX_ST(STORE(stw, %o5, %o1 + %o3)) + add %o1, 0x4, %o1 - /* Compute (len - (len % 8)) into %g2. This is guarenteed - * to be nonzero. - */ - andn %o2, 0x7, %g2 ! A0 Group +1: andcc %o2, 0x2, %g0 + be,pt %icc, 1f + nop + EX_LD(LOAD(lduh, %o1, %o5)) + EX_ST(STORE(sth, %o5, %o1 + %o3)) + add %o1, 0x2, %o1 - /* You may read this and believe that it allows reading - * one 8-byte longword past the end of src. It actually - * does not, as %g2 is subtracted as loads are done from - * src, so we always stop before running off the end. - * Also, we are guarenteed to have at least 0x10 bytes - * to move here. - */ - sub %g2, 0x8, %g2 ! A0 Group (reg-dep) - alignaddr %o1, %g0, %g1 ! MS (Break-after) - ldd [%g1 + 0x00], %f0 ! MS Group (1-cycle stall) - add %g1, 0x8, %g1 ! A0 - -1: ldd [%g1 + 0x00], %f2 ! MS Group - add %g1, 0x8, %g1 ! A0 - sub %o2, 0x8, %o2 ! A1 - subcc %g2, 0x8, %g2 ! A0 Group - - faligndata %f0, %f2, %f8 ! FGA Group (1-cycle stall) - std %f8, [%o0 + 0x00] ! MS Group (2-cycle stall) - add %o1, 0x8, %o1 ! A0 - be,pn %icc, 2f ! BR - - add %o0, 0x8, %o0 ! A1 - ldd [%g1 + 0x00], %f0 ! MS Group - add %g1, 0x8, %g1 ! A0 - sub %o2, 0x8, %o2 ! A1 - - subcc %g2, 0x8, %g2 ! A0 Group - faligndata %f2, %f0, %f8 ! FGA Group (1-cycle stall) - std %f8, [%o0 + 0x00] ! MS Group (2-cycle stall) - add %o1, 0x8, %o1 ! A0 - - bne,pn %icc, 1b ! BR - add %o0, 0x8, %o0 ! A1 - - /* Nothing left to copy? */ -2: cmp %o2, 0 ! A0 Group - VISExitHalf ! A0+MS - be,pn %icc, U3memcpy_short_ret ! BR Group - nop ! A0 - ba,a,pt %xcc, U3memcpy_short ! BR Group - -#else /* !(SMALL_COPY_USES_FPU) */ - - xor %o1, %o0, %g2 - andcc %g2, 0x7, %g0 - bne,pn %icc, U3memcpy_short - andcc %o1, 0x7, %g2 +1: andcc %o2, 0x1, %g0 + be,pt %icc, 85f + nop + EX_LD(LOAD(ldub, %o1, %o5)) + ba,pt %xcc, 85f + EX_ST(STORE(stb, %o5, %o1 + %o3)) - be,pt %xcc, 2f - sub %g2, 0x8, %g2 - sub %g0, %g2, %g2 - sub %o2, %g2, %o2 + .align 64 +70: /* 16 < len <= 64 */ + bne,pn %XCC, 75f + sub %o0, %o1, %o3 + +72: + andn %o2, 0xf, %o4 + and %o2, 0xf, %o2 +1: subcc %o4, 0x10, %o4 + EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) + EX_ST(STORE(stx, %o5, %o1 + %o3)) + add %o1, 0x8, %o1 + EX_ST(STORE(stx, %g1, %o1 + %o3)) + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 +73: andcc %o2, 0x8, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x8, %o2 + EX_LD(LOAD(ldx, %o1, %o5)) + EX_ST(STORE(stx, %o5, %o1 + %o3)) + add %o1, 0x8, %o1 +1: andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 + EX_LD(LOAD(lduw, %o1, %o5)) + EX_ST(STORE(stw, %o5, %o1 + %o3)) + add %o1, 0x4, %o1 +1: cmp %o2, 0 + be,pt %XCC, 85f + nop + ba,pt %xcc, 90f + nop -1: ldub [%o1 + 0x00], %o3 - add %o1, 0x1, %o1 - add %o0, 0x1, %o0 - subcc %g2, 0x1, %g2 - bg,pt %icc, 1b - stb %o3, [%o0 + -1] +75: + andcc %o0, 0x7, %g1 + sub %g1, 0x8, %g1 + be,pn %icc, 2f + sub %g0, %g1, %g1 + sub %o2, %g1, %o2 + +1: subcc %g1, 1, %g1 + EX_LD(LOAD(ldub, %o1, %o5)) + EX_ST(STORE(stb, %o5, %o1 + %o3)) + bgu,pt %icc, 1b + add %o1, 1, %o1 + +2: add %o1, %o3, %o0 + andcc %o1, 0x7, %g1 + bne,pt %icc, 8f + sll %g1, 3, %g1 -2: andn %o2, 0x7, %g2 - sub %o2, %g2, %o2 + cmp %o2, 16 + bgeu,pt %icc, 72b + nop + ba,a,pt %xcc, 73b -3: ldx [%o1 + 0x00], %o3 +8: mov 64, %o3 + andn %o1, 0x7, %o1 + EX_LD(LOAD(ldx, %o1, %g2)) + sub %o3, %g1, %o3 + andn %o2, 0x7, %o4 + sllx %g2, %g1, %g2 +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) + subcc %o4, 0x8, %o4 add %o1, 0x8, %o1 + srlx %g3, %o3, %o5 + or %o5, %g2, %o5 + EX_ST(STORE(stx, %o5, %o0)) add %o0, 0x8, %o0 - subcc %g2, 0x8, %g2 - bg,pt %icc, 3b - stx %o3, [%o0 + -8] + bgu,pt %icc, 1b + sllx %g3, %g1, %g2 - cmp %o2, 0 - bne,pn %icc, U3memcpy_short - nop - ba,a,pt %xcc, U3memcpy_short_ret + srl %g1, 3, %g1 + andcc %o2, 0x7, %o2 + be,pn %icc, 85f + add %o1, %g1, %o1 + ba,pt %xcc, 90f + sub %o0, %o1, %o3 + + .align 64 +80: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, 90f + sub %o0, %o1, %o3 + +1: + subcc %o2, 4, %o2 + EX_LD(LOAD(lduw, %o1, %g1)) + EX_ST(STORE(stw, %g1, %o1 + %o3)) + bgu,pt %XCC, 1b + add %o1, 4, %o1 -#endif /* !(SMALL_COPY_USES_FPU) */ +85: retl + mov EX_RETVAL(%g5), %o0 + + .align 32 +90: + subcc %o2, 1, %o2 + EX_LD(LOAD(ldub, %o1, %g1)) + EX_ST(STORE(stb, %g1, %o1 + %o3)) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl + mov EX_RETVAL(%g5), %o0 diff -urN linux-2.4.27/arch/sparc64/lib/U3patch.S linux-2.4.28/arch/sparc64/lib/U3patch.S --- linux-2.4.27/arch/sparc64/lib/U3patch.S 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.4.28/arch/sparc64/lib/U3patch.S 2004-11-17 03:54:21.150379474 -0800 @@ -0,0 +1,30 @@ +/* U3patch.S: Patch Ultra-I routines with Ultra-III variant. + * + * Copyright (C) 2004 David S. Miller + */ + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define ULTRA3_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + srl %g1, 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl cheetah_patch_copyops +cheetah_patch_copyops: + ULTRA3_DO_PATCH(memcpy, U3memcpy) + ULTRA3_DO_PATCH(___copy_from_user, U3copy_from_user) + ULTRA3_DO_PATCH(___copy_to_user, U3copy_to_user) + retl + nop diff -urN linux-2.4.27/arch/sparc64/lib/VIScopy.S linux-2.4.28/arch/sparc64/lib/VIScopy.S --- linux-2.4.27/arch/sparc64/lib/VIScopy.S 2004-02-18 05:36:31.000000000 -0800 +++ linux-2.4.28/arch/sparc64/lib/VIScopy.S 1969-12-31 16:00:00.000000000 -0800 @@ -1,1187 +0,0 @@ -/* $Id: VIScopy.S,v 1.26 2001/09/27 04:36:24 kanoj Exp $ - * VIScopy.S: High speed copy operations utilizing the UltraSparc - * Visual Instruction Set. - * - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz) - */ - -#include "VIS.h" - - /* VIS code can be used for numerous copy/set operation variants. - * It can be made to work in the kernel, one single instance, - * for all of memcpy, copy_to_user, and copy_from_user by setting - * the ASI src/dest globals correctly. Furthermore it can - * be used for kernel-->kernel page copies as well, a hook label - * is put in here just for this purpose. - * - * For userland, compiling this without __KERNEL__ defined makes - * it work just fine as a generic libc bcopy and memcpy. - * If for userland it is compiled with a 32bit gcc (but you need - * -Wa,-Av9a for as), the code will just rely on lower 32bits of - * IEU registers, if you compile it with 64bit gcc (ie. define - * __sparc_v9__), the code will use full 64bit. - */ - -#ifdef __KERNEL__ - -#include -#include - -#define FPU_CLEAN_RETL \ - ldub [%g6 + AOFF_task_thread + AOFF_thread_current_ds], %o1; \ - VISExit \ - clr %o0; \ - retl; \ - wr %o1, %g0, %asi; -#define FPU_RETL \ - ldub [%g6 + AOFF_task_thread + AOFF_thread_current_ds], %o1; \ - VISExit \ - clr %o0; \ - retl; \ - wr %o1, %g0, %asi; -#define NORMAL_RETL \ - ldub [%g6 + AOFF_task_thread + AOFF_thread_current_ds], %o1; \ - clr %o0; \ - retl; \ - wr %o1, %g0, %asi; -#define EX(x,y,a,b) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: ba VIScopyfixup_ret; \ - a, b, %o1; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EX2(x,y,c,d,e,a,b) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: c, d, e; \ - ba VIScopyfixup_ret; \ - a, b, %o1; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; -#define EXO2(x,y) \ -98: x,y; \ - .section __ex_table; \ - .align 4; \ - .word 98b, VIScopyfixup_reto2; \ - .text; \ - .align 4; -#define EXVISN(x,y,n) \ -98: x,y; \ - .section __ex_table; \ - .align 4; \ - .word 98b, VIScopyfixup_vis##n; \ - .text; \ - .align 4; -#define EXT(start,end,handler) \ - .section __ex_table; \ - .align 4; \ - .word start, 0, end, handler; \ - .text; \ - .align 4; -#else -#ifdef REGS_64BIT -#define FPU_CLEAN_RETL \ - retl; \ - mov %g6, %o0; -#define FPU_RETL \ - retl; \ - mov %g6, %o0; -#else -#define FPU_CLEAN_RETL \ - wr %g0, FPRS_FEF, %fprs; \ - retl; \ - mov %g6, %o0; -#define FPU_RETL \ - wr %g0, FPRS_FEF, %fprs; \ - retl; \ - mov %g6, %o0; -#endif -#define NORMAL_RETL \ - retl; \ - mov %g6, %o0; -#define EX(x,y,a,b) x,y -#define EX2(x,y,c,d,e,a,b) x,y -#define EXO2(x,y) x,y -#define EXVISN(x,y,n) x,y -#define EXT(a,b,c) -#endif -#define EXVIS(x,y) EXVISN(x,y,0) -#define EXVIS1(x,y) EXVISN(x,y,1) -#define EXVIS2(x,y) EXVISN(x,y,2) -#define EXVIS3(x,y) EXVISN(x,y,3) -#define EXVIS4(x,y) EXVISN(x,y,4) - -#define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \ - faligndata %f1, %f2, %f48; \ - faligndata %f2, %f3, %f50; \ - faligndata %f3, %f4, %f52; \ - faligndata %f4, %f5, %f54; \ - faligndata %f5, %f6, %f56; \ - faligndata %f6, %f7, %f58; \ - faligndata %f7, %f8, %f60; \ - faligndata %f8, %f9, %f62; - -#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ - EXVIS(LDBLK [%src] ASIBLK, %fdest); \ - ASI_SETDST_BLK \ - EXVIS(STBLK %fsrc, [%dest] ASIBLK); \ - add %src, 0x40, %src; \ - subcc %len, 0x40, %len; \ - be,pn %xcc, jmptgt; \ - add %dest, 0x40, %dest; \ - ASI_SETSRC_BLK - -#define LOOP_CHUNK1(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) -#define LOOP_CHUNK2(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) -#define LOOP_CHUNK3(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) - -#define STORE_SYNC(dest, fsrc) \ - EXVIS(STBLK %fsrc, [%dest] ASIBLK); \ - add %dest, 0x40, %dest; - -#ifdef __KERNEL__ -#define STORE_JUMP(dest, fsrc, target) \ - srl asi_dest, 3, %g5; \ - EXVIS2(STBLK %fsrc, [%dest] ASIBLK); \ - xor asi_dest, ASI_BLK_XOR1, asi_dest;\ - add %dest, 0x40, %dest; \ - xor asi_dest, %g5, asi_dest; \ - ba,pt %xcc, target; -#else -#define STORE_JUMP(dest, fsrc, target) \ - EXVIS2(STBLK %fsrc, [%dest] ASIBLK); \ - add %dest, 0x40, %dest; \ - ba,pt %xcc, target; -#endif - -#ifndef __KERNEL__ -#define VISLOOP_PAD nop; nop; nop; nop; \ - nop; nop; nop; nop; \ - nop; nop; nop; nop; \ - nop; nop; nop; -#else -#define VISLOOP_PAD -#endif - -#define FINISH_VISCHUNK(dest, f0, f1, left) \ - ASI_SETDST_NOBLK \ - subcc %left, 8, %left; \ - bl,pn %xcc, vis_out; \ - faligndata %f0, %f1, %f48; \ - EXVIS3(STDF %f48, [%dest] ASINORMAL); \ - add %dest, 8, %dest; - -#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ - subcc %left, 8, %left; \ - bl,pn %xcc, vis_out; \ - fsrc1 %f0, %f1; -#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ - UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ - ba,a,pt %xcc, vis_out_slk; - - /* Macros for non-VIS memcpy code. */ -#ifdef REGS_64BIT - -#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ASI_SETSRC_NOBLK \ - LDX [%src + offset + 0x00] ASINORMAL, %t0; \ - LDX [%src + offset + 0x08] ASINORMAL, %t1; \ - LDX [%src + offset + 0x10] ASINORMAL, %t2; \ - LDX [%src + offset + 0x18] ASINORMAL, %t3; \ - ASI_SETDST_NOBLK \ - STW %t0, [%dst + offset + 0x04] ASINORMAL; \ - srlx %t0, 32, %t0; \ - STW %t0, [%dst + offset + 0x00] ASINORMAL; \ - STW %t1, [%dst + offset + 0x0c] ASINORMAL; \ - srlx %t1, 32, %t1; \ - STW %t1, [%dst + offset + 0x08] ASINORMAL; \ - STW %t2, [%dst + offset + 0x14] ASINORMAL; \ - srlx %t2, 32, %t2; \ - STW %t2, [%dst + offset + 0x10] ASINORMAL; \ - STW %t3, [%dst + offset + 0x1c] ASINORMAL; \ - srlx %t3, 32, %t3; \ - STW %t3, [%dst + offset + 0x18] ASINORMAL; - -#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ASI_SETSRC_NOBLK \ - LDX [%src + offset + 0x00] ASINORMAL, %t0; \ - LDX [%src + offset + 0x08] ASINORMAL, %t1; \ - LDX [%src + offset + 0x10] ASINORMAL, %t2; \ - LDX [%src + offset + 0x18] ASINORMAL, %t3; \ - ASI_SETDST_NOBLK \ - STX %t0, [%dst + offset + 0x00] ASINORMAL; \ - STX %t1, [%dst + offset + 0x08] ASINORMAL; \ - STX %t2, [%dst + offset + 0x10] ASINORMAL; \ - STX %t3, [%dst + offset + 0x18] ASINORMAL; \ - ASI_SETSRC_NOBLK \ - LDX [%src + offset + 0x20] ASINORMAL, %t0; \ - LDX [%src + offset + 0x28] ASINORMAL, %t1; \ - LDX [%src + offset + 0x30] ASINORMAL, %t2; \ - LDX [%src + offset + 0x38] ASINORMAL, %t3; \ - ASI_SETDST_NOBLK \ - STX %t0, [%dst + offset + 0x20] ASINORMAL; \ - STX %t1, [%dst + offset + 0x28] ASINORMAL; \ - STX %t2, [%dst + offset + 0x30] ASINORMAL; \ - STX %t3, [%dst + offset + 0x38] ASINORMAL; - -#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ASI_SETSRC_NOBLK \ - LDX [%src - offset - 0x10] ASINORMAL, %t0; \ - LDX [%src - offset - 0x08] ASINORMAL, %t1; \ - ASI_SETDST_NOBLK \ - STW %t0, [%dst - offset - 0x0c] ASINORMAL; \ - srlx %t0, 32, %t2; \ - STW %t2, [%dst - offset - 0x10] ASINORMAL; \ - STW %t1, [%dst - offset - 0x04] ASINORMAL; \ - srlx %t1, 32, %t3; \ - STW %t3, [%dst - offset - 0x08] ASINORMAL; - -#define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ASI_SETSRC_NOBLK \ - LDX [%src - offset - 0x10] ASINORMAL, %t0; \ - LDX [%src - offset - 0x08] ASINORMAL, %t1; \ - ASI_SETDST_NOBLK \ - STX %t0, [%dst - offset - 0x10] ASINORMAL; \ - STX %t1, [%dst - offset - 0x08] ASINORMAL; - -#else /* !REGS_64BIT */ - -#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - lduw [%src + offset + 0x00], %t0; \ - lduw [%src + offset + 0x04], %t1; \ - lduw [%src + offset + 0x08], %t2; \ - lduw [%src + offset + 0x0c], %t3; \ - stw %t0, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x04]; \ - stw %t2, [%dst + offset + 0x08]; \ - stw %t3, [%dst + offset + 0x0c]; \ - lduw [%src + offset + 0x10], %t0; \ - lduw [%src + offset + 0x14], %t1; \ - lduw [%src + offset + 0x18], %t2; \ - lduw [%src + offset + 0x1c], %t3; \ - stw %t0, [%dst + offset + 0x10]; \ - stw %t1, [%dst + offset + 0x14]; \ - stw %t2, [%dst + offset + 0x18]; \ - stw %t3, [%dst + offset + 0x1c]; - -#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - lduw [%src - offset - 0x10], %t0; \ - lduw [%src - offset - 0x0c], %t1; \ - lduw [%src - offset - 0x08], %t2; \ - lduw [%src - offset - 0x04], %t3; \ - stw %t0, [%dst - offset - 0x10]; \ - stw %t1, [%dst - offset - 0x0c]; \ - stw %t2, [%dst - offset - 0x08]; \ - stw %t3, [%dst - offset - 0x04]; - -#endif /* !REGS_64BIT */ - -#ifdef __KERNEL__ - .section __ex_table,#alloc - .section .fixup,#alloc,#execinstr -#endif - - .text - .align 32 - .globl memcpy - .type memcpy,@function - - .globl bcopy - .type bcopy,@function - -#ifdef __KERNEL__ - .globl __memcpy_begin -__memcpy_begin: - - .globl __memcpy - .type __memcpy,@function - -memcpy_private: -__memcpy: -memcpy: mov ASI_P, asi_src ! IEU0 Group - brnz,pt %o2, __memcpy_entry ! CTI - mov ASI_P, asi_dest ! IEU1 - retl - clr %o0 - - .align 32 - .globl __copy_from_user - .type __copy_from_user,@function -__copy_from_user:rd %asi, asi_src ! IEU0 Group - brnz,pt %o2, __memcpy_entry ! CTI - mov ASI_P, asi_dest ! IEU1 - - .globl __copy_to_user - .type __copy_to_user,@function -__copy_to_user: mov ASI_P, asi_src ! IEU0 Group - brnz,pt %o2, __memcpy_entry ! CTI - rd %asi, asi_dest ! IEU1 - retl ! CTI Group - clr %o0 ! IEU0 Group - - .globl __copy_in_user - .type __copy_in_user,@function -__copy_in_user: rd %asi, asi_src ! IEU0 Group - brnz,pt %o2, __memcpy_entry ! CTI - mov asi_src, asi_dest ! IEU1 - retl ! CTI Group - clr %o0 ! IEU0 Group -#endif - -bcopy: or %o0, 0, %g3 ! IEU0 Group - addcc %o1, 0, %o0 ! IEU1 - brgez,pt %o2, memcpy_private ! CTI - or %g3, 0, %o1 ! IEU0 Group - retl ! CTI Group brk forced - clr %o0 ! IEU0 - - -#ifdef __KERNEL__ -#define BRANCH_ALWAYS 0x10680000 -#define NOP 0x01000000 -#define ULTRA3_DO_PATCH(OLD, NEW) \ - sethi %hi(NEW), %g1; \ - or %g1, %lo(NEW), %g1; \ - sethi %hi(OLD), %g2; \ - or %g2, %lo(OLD), %g2; \ - sub %g1, %g2, %g1; \ - sethi %hi(BRANCH_ALWAYS), %g3; \ - srl %g1, 2, %g1; \ - or %g3, %lo(BRANCH_ALWAYS), %g3; \ - or %g3, %g1, %g3; \ - stw %g3, [%g2]; \ - sethi %hi(NOP), %g3; \ - or %g3, %lo(NOP), %g3; \ - stw %g3, [%g2 + 0x4]; \ - flush %g2; -#define ULTRA3_PCACHE_DO_NOP(symbol) \ - sethi %hi(symbol##_nop_1_6), %g1; \ - or %g1, %lo(symbol##_nop_1_6), %g1; \ - sethi %hi(NOP), %g2; \ - stw %g2, [%g1 + 0x00]; \ - stw %g2, [%g1 + 0x04]; \ - flush %g1 + 0x00; \ - stw %g2, [%g1 + 0x08]; \ - stw %g2, [%g1 + 0x0c]; \ - flush %g1 + 0x08; \ - stw %g2, [%g1 + 0x10]; \ - stw %g2, [%g1 + 0x04]; \ - flush %g1 + 0x10; \ - sethi %hi(symbol##_nop_2_3), %g1; \ - or %g1, %lo(symbol##_nop_2_3), %g1; \ - stw %g2, [%g1 + 0x00]; \ - stw %g2, [%g1 + 0x04]; \ - flush %g1 + 0x00; \ - stw %g2, [%g1 + 0x08]; \ - flush %g1 + 0x08; - -#include - - .globl cheetah_patch_copyops -cheetah_patch_copyops: - ULTRA3_DO_PATCH(memcpy, U3memcpy) - ULTRA3_DO_PATCH(__copy_from_user, U3copy_from_user) - ULTRA3_DO_PATCH(__copy_to_user, U3copy_to_user) - ULTRA3_DO_PATCH(__copy_in_user, U3copy_in_user) -#if 0 /* Causes data corruption, nop out the optimization - * for now -DaveM - */ - ldxa [%g0] ASI_DCU_CONTROL_REG, %g3 - sethi %uhi(DCU_PE), %o3 - sllx %o3, 32, %o3 - andcc %g3, %o3, %g0 - be,pn %xcc, pcache_disabled - nop -#endif - ULTRA3_PCACHE_DO_NOP(U3memcpy) - ULTRA3_PCACHE_DO_NOP(U3copy_from_user) - ULTRA3_PCACHE_DO_NOP(U3copy_to_user) - ULTRA3_PCACHE_DO_NOP(cheetah_copy_user_page) -#if 0 -pcache_disabled: -#endif - retl - nop -#undef BRANCH_ALWAYS -#undef NOP -#undef ULTRA3_DO_PATCH -#endif /* __KERNEL__ */ - - .align 32 -#ifdef __KERNEL__ - andcc %o0, 7, %g2 ! IEU1 Group -#endif -VIS_enter: - be,pt %xcc, dest_is_8byte_aligned ! CTI -#ifdef __KERNEL__ - nop ! IEU0 Group -#else - andcc %o0, 0x38, %g5 ! IEU1 Group -#endif -do_dest_8byte_align: - mov 8, %g1 ! IEU0 - sub %g1, %g2, %g2 ! IEU0 Group - andcc %o0, 1, %g0 ! IEU1 - be,pt %icc, 2f ! CTI - sub %o2, %g2, %o2 ! IEU0 Group -1: ASI_SETSRC_NOBLK ! LSU Group - EX(LDUB [%o1] ASINORMAL, %o5, - add %o2, %g2) ! Load Group - add %o1, 1, %o1 ! IEU0 - add %o0, 1, %o0 ! IEU1 - ASI_SETDST_NOBLK ! LSU Group - subcc %g2, 1, %g2 ! IEU1 Group - be,pn %xcc, 3f ! CTI - EX2(STB %o5, [%o0 - 1] ASINORMAL, - add %g2, 1, %g2, - add %o2, %g2) ! Store -2: ASI_SETSRC_NOBLK ! LSU Group - EX(LDUB [%o1] ASINORMAL, %o5, - add %o2, %g2) ! Load Group - add %o0, 2, %o0 ! IEU0 - EX2(LDUB [%o1 + 1] ASINORMAL, %g3, - sub %o0, 2, %o0, - add %o2, %g2) ! Load Group - ASI_SETDST_NOBLK ! LSU Group - subcc %g2, 2, %g2 ! IEU1 Group - EX2(STB %o5, [%o0 - 2] ASINORMAL, - add %g2, 2, %g2, - add %o2, %g2) ! Store - add %o1, 2, %o1 ! IEU0 - bne,pt %xcc, 2b ! CTI Group - EX2(STB %g3, [%o0 - 1] ASINORMAL, - add %g2, 1, %g2, - add %o2, %g2) ! Store -#ifdef __KERNEL__ -3: -dest_is_8byte_aligned: - VISEntry - andcc %o0, 0x38, %g5 ! IEU1 Group -#else -3: andcc %o0, 0x38, %g5 ! IEU1 Group -dest_is_8byte_aligned: -#endif - be,pt %icc, dest_is_64byte_aligned ! CTI - mov 64, %g1 ! IEU0 - fmovd %f0, %f2 ! FPU - sub %g1, %g5, %g5 ! IEU0 Group - ASI_SETSRC_NOBLK ! LSU Group - alignaddr %o1, %g0, %g1 ! GRU Group - EXO2(LDDF [%g1] ASINORMAL, %f4) ! Load Group - sub %o2, %g5, %o2 ! IEU0 -1: EX(LDDF [%g1 + 0x8] ASINORMAL, %f6, - add %o2, %g5) ! Load Group - add %g1, 0x8, %g1 ! IEU0 Group - subcc %g5, 8, %g5 ! IEU1 - ASI_SETDST_NOBLK ! LSU Group - faligndata %f4, %f6, %f0 ! GRU Group - EX2(STDF %f0, [%o0] ASINORMAL, - add %g5, 8, %g5, - add %o2, %g5) ! Store - add %o1, 8, %o1 ! IEU0 Group - be,pn %xcc, dest_is_64byte_aligned ! CTI - add %o0, 8, %o0 ! IEU1 - ASI_SETSRC_NOBLK ! LSU Group - EX(LDDF [%g1 + 0x8] ASINORMAL, %f4, - add %o2, %g5) ! Load Group - add %g1, 8, %g1 ! IEU0 - subcc %g5, 8, %g5 ! IEU1 - ASI_SETDST_NOBLK ! LSU Group - faligndata %f6, %f4, %f0 ! GRU Group - EX2(STDF %f0, [%o0] ASINORMAL, - add %g5, 8, %g5, - add %o2, %g5) ! Store - add %o1, 8, %o1 ! IEU0 - ASI_SETSRC_NOBLK ! LSU Group - bne,pt %xcc, 1b ! CTI Group - add %o0, 8, %o0 ! IEU0 -dest_is_64byte_aligned: - membar #LoadStore | #StoreStore | #StoreLoad ! LSU Group -#ifndef __KERNEL__ - wr %g0, ASI_BLK_P, %asi ! LSU Group -#endif - subcc %o2, 0x40, %g7 ! IEU1 Group - mov %o1, %g1 ! IEU0 - andncc %g7, (0x40 - 1), %g7 ! IEU1 Group - srl %g1, 3, %g2 ! IEU0 - sub %o2, %g7, %g3 ! IEU0 Group - andn %o1, (0x40 - 1), %o1 ! IEU1 - and %g2, 7, %g2 ! IEU0 Group - andncc %g3, 0x7, %g3 ! IEU1 - fmovd %f0, %f2 ! FPU - sub %g3, 0x10, %g3 ! IEU0 Group - sub %o2, %g7, %o2 ! IEU1 -#ifdef __KERNEL__ - or asi_src, ASI_BLK_OR, asi_src ! IEU0 Group - or asi_dest, ASI_BLK_OR, asi_dest ! IEU1 -#endif - alignaddr %g1, %g0, %g0 ! GRU Group - add %g1, %g7, %g1 ! IEU0 Group - subcc %o2, %g3, %o2 ! IEU1 - ASI_SETSRC_BLK ! LSU Group - EXVIS1(LDBLK [%o1 + 0x00] ASIBLK, %f0) ! LSU Group - add %g1, %g3, %g1 ! IEU0 - EXVIS1(LDBLK [%o1 + 0x40] ASIBLK, %f16) ! LSU Group - sub %g7, 0x80, %g7 ! IEU0 - EXVIS(LDBLK [%o1 + 0x80] ASIBLK, %f32) ! LSU Group -#ifdef __KERNEL__ -vispc: sll %g2, 9, %g2 ! IEU0 Group - sethi %hi(vis00), %g5 ! IEU1 - or %g5, %lo(vis00), %g5 ! IEU0 Group - jmpl %g5 + %g2, %g0 ! CTI Group brk forced - addcc %o1, 0xc0, %o1 ! IEU1 Group -#else - ! Clk1 Group 8-( - ! Clk2 Group 8-( - ! Clk3 Group 8-( - ! Clk4 Group 8-( -vispc: rd %pc, %g5 ! PDU Group 8-( - addcc %g5, %lo(vis00 - vispc), %g5 ! IEU1 Group - sll %g2, 9, %g2 ! IEU0 - jmpl %g5 + %g2, %g0 ! CTI Group brk forced - addcc %o1, 0xc0, %o1 ! IEU1 Group -#endif - .align 512 /* OK, here comes the fun part... */ -vis00:FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) LOOP_CHUNK1(o1, o0, g7, vis01) - FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) LOOP_CHUNK2(o1, o0, g7, vis02) - FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) LOOP_CHUNK3(o1, o0, g7, vis03) - b,pt %xcc, vis00+4; faligndata %f0, %f2, %f48 -vis01:FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) STORE_JUMP(o0, f48, finish_f0) membar #Sync -vis02:FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_JUMP(o0, f48, finish_f16) membar #Sync -vis03:FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_JUMP(o0, f48, finish_f32) membar #Sync - VISLOOP_PAD -vis10:FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) LOOP_CHUNK1(o1, o0, g7, vis11) - FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) LOOP_CHUNK2(o1, o0, g7, vis12) - FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) LOOP_CHUNK3(o1, o0, g7, vis13) - b,pt %xcc, vis10+4; faligndata %f2, %f4, %f48 -vis11:FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) STORE_JUMP(o0, f48, finish_f2) membar #Sync -vis12:FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_JUMP(o0, f48, finish_f18) membar #Sync -vis13:FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_JUMP(o0, f48, finish_f34) membar #Sync - VISLOOP_PAD -vis20:FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) LOOP_CHUNK1(o1, o0, g7, vis21) - FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) LOOP_CHUNK2(o1, o0, g7, vis22) - FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) LOOP_CHUNK3(o1, o0, g7, vis23) - b,pt %xcc, vis20+4; faligndata %f4, %f6, %f48 -vis21:FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) STORE_JUMP(o0, f48, finish_f4) membar #Sync -vis22:FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_JUMP(o0, f48, finish_f20) membar #Sync -vis23:FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_JUMP(o0, f48, finish_f36) membar #Sync - VISLOOP_PAD -vis30:FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) LOOP_CHUNK1(o1, o0, g7, vis31) - FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) LOOP_CHUNK2(o1, o0, g7, vis32) - FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) LOOP_CHUNK3(o1, o0, g7, vis33) - b,pt %xcc, vis30+4; faligndata %f6, %f8, %f48 -vis31:FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) STORE_JUMP(o0, f48, finish_f6) membar #Sync -vis32:FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_JUMP(o0, f48, finish_f22) membar #Sync -vis33:FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_JUMP(o0, f48, finish_f38) membar #Sync - VISLOOP_PAD -vis40:FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) LOOP_CHUNK1(o1, o0, g7, vis41) - FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) LOOP_CHUNK2(o1, o0, g7, vis42) - FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) LOOP_CHUNK3(o1, o0, g7, vis43) - b,pt %xcc, vis40+4; faligndata %f8, %f10, %f48 -vis41:FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) STORE_JUMP(o0, f48, finish_f8) membar #Sync -vis42:FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_JUMP(o0, f48, finish_f24) membar #Sync -vis43:FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_JUMP(o0, f48, finish_f40) membar #Sync - VISLOOP_PAD -vis50:FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) LOOP_CHUNK1(o1, o0, g7, vis51) - FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) LOOP_CHUNK2(o1, o0, g7, vis52) - FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) LOOP_CHUNK3(o1, o0, g7, vis53) - b,pt %xcc, vis50+4; faligndata %f10, %f12, %f48 -vis51:FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_JUMP(o0, f48, finish_f10) membar #Sync -vis52:FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_JUMP(o0, f48, finish_f26) membar #Sync -vis53:FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_JUMP(o0, f48, finish_f42) membar #Sync - VISLOOP_PAD -vis60:FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) LOOP_CHUNK1(o1, o0, g7, vis61) - FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) LOOP_CHUNK2(o1, o0, g7, vis62) - FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) LOOP_CHUNK3(o1, o0, g7, vis63) - b,pt %xcc, vis60+4; faligndata %f12, %f14, %f48 -vis61:FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_JUMP(o0, f48, finish_f12) membar #Sync -vis62:FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_JUMP(o0, f48, finish_f28) membar #Sync -vis63:FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_JUMP(o0, f48, finish_f44) membar #Sync - VISLOOP_PAD -vis70:FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) LOOP_CHUNK1(o1, o0, g7, vis71) - FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) LOOP_CHUNK2(o1, o0, g7, vis72) - FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) LOOP_CHUNK3(o1, o0, g7, vis73) - b,pt %xcc, vis70+4; faligndata %f14, %f16, %f48 -vis71:FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_JUMP(o0, f48, finish_f14) membar #Sync -vis72:FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_JUMP(o0, f48, finish_f30) membar #Sync -vis73:FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, finish_f46) membar #Sync - VISLOOP_PAD -finish_f0: FINISH_VISCHUNK(o0, f0, f2, g3) -finish_f2: FINISH_VISCHUNK(o0, f2, f4, g3) -finish_f4: FINISH_VISCHUNK(o0, f4, f6, g3) -finish_f6: FINISH_VISCHUNK(o0, f6, f8, g3) -finish_f8: FINISH_VISCHUNK(o0, f8, f10, g3) -finish_f10: FINISH_VISCHUNK(o0, f10, f12, g3) -finish_f12: FINISH_VISCHUNK(o0, f12, f14, g3) -finish_f14: UNEVEN_VISCHUNK(o0, f14, f0, g3) -finish_f16: FINISH_VISCHUNK(o0, f16, f18, g3) -finish_f18: FINISH_VISCHUNK(o0, f18, f20, g3) -finish_f20: FINISH_VISCHUNK(o0, f20, f22, g3) -finish_f22: FINISH_VISCHUNK(o0, f22, f24, g3) -finish_f24: FINISH_VISCHUNK(o0, f24, f26, g3) -finish_f26: FINISH_VISCHUNK(o0, f26, f28, g3) -finish_f28: FINISH_VISCHUNK(o0, f28, f30, g3) -finish_f30: UNEVEN_VISCHUNK(o0, f30, f0, g3) -finish_f32: FINISH_VISCHUNK(o0, f32, f34, g3) -finish_f34: FINISH_VISCHUNK(o0, f34, f36, g3) -finish_f36: FINISH_VISCHUNK(o0, f36, f38, g3) -finish_f38: FINISH_VISCHUNK(o0, f38, f40, g3) -finish_f40: FINISH_VISCHUNK(o0, f40, f42, g3) -finish_f42: FINISH_VISCHUNK(o0, f42, f44, g3) -finish_f44: FINISH_VISCHUNK(o0, f44, f46, g3) -finish_f46: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) -vis_out_slk: -#ifdef __KERNEL__ - srl asi_src, 3, %g5 ! IEU0 Group - xor asi_src, ASI_BLK_XOR1, asi_src ! IEU1 - xor asi_src, %g5, asi_src ! IEU0 Group -#endif -vis_slk:ASI_SETSRC_NOBLK ! LSU Group - EXVIS3(LDDF [%o1] ASINORMAL, %f2) ! Load Group - add %o1, 8, %o1 ! IEU0 - subcc %g3, 8, %g3 ! IEU1 - ASI_SETDST_NOBLK ! LSU Group - faligndata %f0, %f2, %f8 ! GRU Group - EXVIS4(STDF %f8, [%o0] ASINORMAL) ! Store - bl,pn %xcc, vis_out_slp ! CTI - add %o0, 8, %o0 ! IEU0 Group - ASI_SETSRC_NOBLK ! LSU Group - EXVIS3(LDDF [%o1] ASINORMAL, %f0) ! Load Group - add %o1, 8, %o1 ! IEU0 - subcc %g3, 8, %g3 ! IEU1 - ASI_SETDST_NOBLK ! LSU Group - faligndata %f2, %f0, %f8 ! GRU Group - EXVIS4(STDF %f8, [%o0] ASINORMAL) ! Store - bge,pt %xcc, vis_slk ! CTI - add %o0, 8, %o0 ! IEU0 Group -vis_out_slp: -#ifdef __KERNEL__ - brz,pt %o2, vis_ret ! CTI Group - mov %g1, %o1 ! IEU0 - ba,pt %xcc, vis_slp+4 ! CTI Group - ASI_SETSRC_NOBLK ! LSU Group -#endif -vis_out:brz,pt %o2, vis_ret ! CTI Group - mov %g1, %o1 ! IEU0 -#ifdef __KERNEL__ - srl asi_src, 3, %g5 ! IEU0 Group - xor asi_src, ASI_BLK_XOR1, asi_src ! IEU1 - xor asi_src, %g5, asi_src ! IEU0 Group -#endif -vis_slp:ASI_SETSRC_NOBLK ! LSU Group - EXO2(LDUB [%o1] ASINORMAL, %g5) ! LOAD - add %o1, 1, %o1 ! IEU0 - add %o0, 1, %o0 ! IEU1 - ASI_SETDST_NOBLK ! LSU Group - subcc %o2, 1, %o2 ! IEU1 - bne,pt %xcc, vis_slp ! CTI - EX(STB %g5, [%o0 - 1] ASINORMAL, - add %o2, 1) ! Store Group -vis_ret:membar #StoreLoad | #StoreStore ! LSU Group - FPU_CLEAN_RETL - - -__memcpy_short: - andcc %o2, 1, %g0 ! IEU1 Group - be,pt %icc, 2f ! CTI -1: ASI_SETSRC_NOBLK ! LSU Group - EXO2(LDUB [%o1] ASINORMAL, %g5) ! LOAD Group - add %o1, 1, %o1 ! IEU0 - add %o0, 1, %o0 ! IEU1 - ASI_SETDST_NOBLK ! LSU Group - subcc %o2, 1, %o2 ! IEU1 Group - be,pn %xcc, short_ret ! CTI - EX(STB %g5, [%o0 - 1] ASINORMAL, - add %o2, 1) ! Store -2: ASI_SETSRC_NOBLK ! LSU Group - EXO2(LDUB [%o1] ASINORMAL, %g5) ! LOAD Group - add %o0, 2, %o0 ! IEU0 - EX2(LDUB [%o1 + 1] ASINORMAL, %o5, - sub %o0, 2, %o0, - add %o2, %g0) ! LOAD Group - add %o1, 2, %o1 ! IEU0 - ASI_SETDST_NOBLK ! LSU Group - subcc %o2, 2, %o2 ! IEU1 Group - EX(STB %g5, [%o0 - 2] ASINORMAL, - add %o2, 2) ! Store - bne,pt %xcc, 2b ! CTI - EX(STB %o5, [%o0 - 1] ASINORMAL, - add %o2, 1) ! Store -short_ret: - NORMAL_RETL - -#ifndef __KERNEL__ -memcpy_private: -memcpy: -#ifndef REGS_64BIT - srl %o2, 0, %o2 ! IEU1 Group -#endif - brz,pn %o2, short_ret ! CTI Group - mov %o0, %g6 ! IEU0 -#endif -__memcpy_entry: - cmp %o2, 15 ! IEU1 Group - bleu,pn %xcc, __memcpy_short ! CTI - cmp %o2, (64 * 6) ! IEU1 Group - bgeu,pn %xcc, VIS_enter ! CTI - andcc %o0, 7, %g2 ! IEU1 Group - sub %o0, %o1, %g5 ! IEU0 - andcc %g5, 3, %o5 ! IEU1 Group - bne,pn %xcc, memcpy_noVIS_misaligned ! CTI - andcc %o1, 3, %g0 ! IEU1 Group -#ifdef REGS_64BIT - be,a,pt %xcc, 3f ! CTI - andcc %o1, 4, %g0 ! IEU1 Group - andcc %o1, 1, %g0 ! IEU1 Group -#else /* !REGS_64BIT */ - be,pt %xcc, 5f ! CTI - andcc %o1, 1, %g0 ! IEU1 Group -#endif /* !REGS_64BIT */ - be,pn %xcc, 4f ! CTI - andcc %o1, 2, %g0 ! IEU1 Group - ASI_SETSRC_NOBLK ! LSU Group - EXO2(LDUB [%o1] ASINORMAL, %g2) ! Load Group - add %o1, 1, %o1 ! IEU0 - add %o0, 1, %o0 ! IEU1 - sub %o2, 1, %o2 ! IEU0 Group - ASI_SETDST_NOBLK ! LSU Group - bne,pn %xcc, 5f ! CTI Group - EX(STB %g2, [%o0 - 1] ASINORMAL, - add %o2, 1) ! Store -4: ASI_SETSRC_NOBLK ! LSU Group - EXO2(LDUH [%o1] ASINORMAL, %g2) ! Load Group - add %o1, 2, %o1 ! IEU0 - add %o0, 2, %o0 ! IEU1 - ASI_SETDST_NOBLK ! LSU Group - sub %o2, 2, %o2 ! IEU0 - EX(STH %g2, [%o0 - 2] ASINORMAL, - add %o2, 2) ! Store Group + bubble -#ifdef REGS_64BIT -5: andcc %o1, 4, %g0 ! IEU1 -3: be,a,pn %xcc, 2f ! CTI - andcc %o2, -128, %g7 ! IEU1 Group - ASI_SETSRC_NOBLK ! LSU Group - EXO2(LDUW [%o1] ASINORMAL, %g5) ! Load Group - add %o1, 4, %o1 ! IEU0 - add %o0, 4, %o0 ! IEU1 - ASI_SETDST_NOBLK ! LSU Group - sub %o2, 4, %o2 ! IEU0 Group - EX(STW %g5, [%o0 - 4] ASINORMAL, - add %o2, 4) ! Store - andcc %o2, -128, %g7 ! IEU1 Group -2: be,pn %xcc, 3f ! CTI - andcc %o0, 4, %g0 ! IEU1 Group - be,pn %xcc, 82f + 4 ! CTI Group -#else /* !REGS_64BIT */ -5: andcc %o2, -128, %g7 ! IEU1 - be,a,pn %xcc, 41f ! CTI - andcc %o2, 0x70, %g7 ! IEU1 Group -#endif /* !REGS_64BIT */ -5: MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - EXT(5b,35f,VIScopyfixup1) -35: subcc %g7, 128, %g7 ! IEU1 Group - add %o1, 128, %o1 ! IEU0 - bne,pt %xcc, 5b ! CTI - add %o0, 128, %o0 ! IEU0 Group -3: andcc %o2, 0x70, %g7 ! IEU1 Group -41: be,pn %xcc, 80f ! CTI - andcc %o2, 8, %g0 ! IEU1 Group -#ifdef __KERNEL__ -79: sethi %hi(80f), %o5 ! IEU0 - sll %g7, 1, %g5 ! IEU0 Group - add %o1, %g7, %o1 ! IEU1 - srl %g7, 1, %g2 ! IEU0 Group - sub %o5, %g5, %o5 ! IEU1 - sub %o5, %g2, %o5 ! IEU0 Group - jmpl %o5 + %lo(80f), %g0 ! CTI Group brk forced - add %o0, %g7, %o0 ! IEU0 Group -#else - ! Clk1 8-( - ! Clk2 8-( - ! Clk3 8-( - ! Clk4 8-( -79: rd %pc, %o5 ! PDU Group - sll %g7, 1, %g5 ! IEU0 Group - add %o1, %g7, %o1 ! IEU1 - sub %o5, %g5, %o5 ! IEU0 Group - jmpl %o5 + %lo(80f - 79b), %g0 ! CTI Group brk forced - add %o0, %g7, %o0 ! IEU0 Group -#endif -36: MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) - EXT(36b,80f,VIScopyfixup2) -80: be,pt %xcc, 81f ! CTI - andcc %o2, 4, %g0 ! IEU1 -#ifdef REGS_64BIT - ASI_SETSRC_NOBLK ! LSU Group - EX(LDX [%o1] ASINORMAL, %g2, - and %o2, 0xf) ! Load Group - add %o0, 8, %o0 ! IEU0 - ASI_SETDST_NOBLK ! LSU Group - EX(STW %g2, [%o0 - 0x4] ASINORMAL, - and %o2, 0xf) ! Store Group - add %o1, 8, %o1 ! IEU1 - srlx %g2, 32, %g2 ! IEU0 Group - EX2(STW %g2, [%o0 - 0x8] ASINORMAL, - and %o2, 0xf, %o2, - sub %o2, 4) ! Store -#else /* !REGS_64BIT */ - lduw [%o1], %g2 ! Load Group - add %o0, 8, %o0 ! IEU0 - lduw [%o1 + 0x4], %g3 ! Load Group - add %o1, 8, %o1 ! IEU0 - stw %g2, [%o0 - 0x8] ! Store Group - stw %g3, [%o0 - 0x4] ! Store Group -#endif /* !REGS_64BIT */ -81: be,pt %xcc, 1f ! CTI - andcc %o2, 2, %g0 ! IEU1 Group - ASI_SETSRC_NOBLK ! LSU Group - EX(LDUW [%o1] ASINORMAL, %g2, - and %o2, 0x7) ! Load Group - add %o1, 4, %o1 ! IEU0 - ASI_SETDST_NOBLK ! LSU Group - EX(STW %g2, [%o0] ASINORMAL, - and %o2, 0x7) ! Store Group - add %o0, 4, %o0 ! IEU0 -1: be,pt %xcc, 1f ! CTI - andcc %o2, 1, %g0 ! IEU1 Group - ASI_SETSRC_NOBLK ! LSU Group - EX(LDUH [%o1] ASINORMAL, %g2, - and %o2, 0x3) ! Load Group - add %o1, 2, %o1 ! IEU0 - ASI_SETDST_NOBLK ! LSU Group - EX(STH %g2, [%o0] ASINORMAL, - and %o2, 0x3) ! Store Group - add %o0, 2, %o0 ! IEU0 -1: be,pt %xcc, normal_retl ! CTI - nop ! IEU1 - ASI_SETSRC_NOBLK ! LSU Group - EX(LDUB [%o1] ASINORMAL, %g2, - add %g0, 1) ! Load Group - ASI_SETDST_NOBLK ! LSU Group - EX(STB %g2, [%o0] ASINORMAL, - add %g0, 1) ! Store Group + bubble -normal_retl: - NORMAL_RETL - -#ifdef REGS_64BIT -82: MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - EXT(82b,37f,VIScopyfixup3) -37: subcc %g7, 128, %g7 ! IEU1 Group - add %o1, 128, %o1 ! IEU0 - bne,pt %xcc, 82b ! CTI - add %o0, 128, %o0 ! IEU0 Group - andcc %o2, 0x70, %g7 ! IEU1 - be,pn %xcc, 84f ! CTI - andcc %o2, 8, %g0 ! IEU1 Group -#ifdef __KERNEL__ -83: srl %g7, 1, %g5 ! IEU0 - sethi %hi(84f), %o5 ! IEU0 Group - add %g7, %g5, %g5 ! IEU1 - add %o1, %g7, %o1 ! IEU0 Group - sub %o5, %g5, %o5 ! IEU1 - jmpl %o5 + %lo(84f), %g0 ! CTI Group brk forced - add %o0, %g7, %o0 ! IEU0 Group -#else - ! Clk1 8-( - ! Clk2 8-( - ! Clk3 8-( - ! Clk4 8-( -83: rd %pc, %o5 ! PDU Group - add %o1, %g7, %o1 ! IEU0 Group - sub %o5, %g7, %o5 ! IEU1 - jmpl %o5 + %lo(84f - 83b), %g0 ! CTI Group brk forced - add %o0, %g7, %o0 ! IEU0 Group -#endif -38: MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) - EXT(38b,84f,VIScopyfixup4) -84: be,pt %xcc, 85f ! CTI Group - andcc %o2, 4, %g0 ! IEU1 - ASI_SETSRC_NOBLK ! LSU Group - EX(LDX [%o1] ASINORMAL, %g2, - and %o2, 0xf) ! Load Group - add %o0, 8, %o0 ! IEU0 - ASI_SETDST_NOBLK ! LSU Group - add %o1, 8, %o1 ! IEU0 Group - EX(STX %g2, [%o0 - 0x8] ASINORMAL, - and %o2, 0xf) ! Store -85: be,pt %xcc, 1f ! CTI - andcc %o2, 2, %g0 ! IEU1 Group - ASI_SETSRC_NOBLK ! LSU Group - EX(LDUW [%o1] ASINORMAL, %g2, - and %o2, 0x7) ! Load Group - add %o0, 4, %o0 ! IEU0 - ASI_SETDST_NOBLK ! LSU Group - add %o1, 4, %o1 ! IEU0 Group - EX(STW %g2, [%o0 - 0x4] ASINORMAL, - and %o2, 0x7) ! Store -1: be,pt %xcc, 1f ! CTI - andcc %o2, 1, %g0 ! IEU1 Group - ASI_SETSRC_NOBLK ! LSU Group - EX(LDUH [%o1] ASINORMAL, %g2, - and %o2, 0x3) ! Load Group - add %o0, 2, %o0 ! IEU0 - ASI_SETDST_NOBLK ! LSU Group - add %o1, 2, %o1 ! IEU0 Group - EX(STH %g2, [%o0 - 0x2] ASINORMAL, - and %o2, 0x3) ! Store -1: be,pt %xcc, 1f ! CTI - nop ! IEU0 Group - ASI_SETSRC_NOBLK ! LSU Group - EX(LDUB [%o1] ASINORMAL, %g2, - add %g0, 1) ! Load Group - ASI_SETDST_NOBLK ! LSU Group - EX(STB %g2, [%o0] ASINORMAL, - add %g0, 1) ! Store Group + bubble -1: NORMAL_RETL -#endif /* REGS_64BIT */ - -memcpy_noVIS_misaligned: - brz,pt %g2, 2f ! CTI Group - mov 8, %g1 ! IEU0 - sub %g1, %g2, %g2 ! IEU0 Group - sub %o2, %g2, %o2 ! IEU0 Group -1: ASI_SETSRC_NOBLK ! LSU Group - EX(LDUB [%o1] ASINORMAL, %g5, - add %o2, %g2) ! Load Group - add %o1, 1, %o1 ! IEU0 - add %o0, 1, %o0 ! IEU1 - ASI_SETDST_NOBLK ! LSU Group - subcc %g2, 1, %g2 ! IEU1 Group - bne,pt %xcc, 1b ! CTI - EX2(STB %g5, [%o0 - 1] ASINORMAL, - add %o2, %g2, %o2, - add %o2, 1) ! Store -2: -#ifdef __KERNEL__ - VISEntry -#endif - andn %o2, 7, %g5 ! IEU0 Group - and %o2, 7, %o2 ! IEU1 - fmovd %f0, %f2 ! FPU - ASI_SETSRC_NOBLK ! LSU Group - alignaddr %o1, %g0, %g1 ! GRU Group - EXO2(LDDF [%g1] ASINORMAL, %f4) ! Load Group -1: EX(LDDF [%g1 + 0x8] ASINORMAL, %f6, - add %o2, %g5) ! Load Group - add %g1, 0x8, %g1 ! IEU0 Group - subcc %g5, 8, %g5 ! IEU1 - ASI_SETDST_NOBLK ! LSU Group - faligndata %f4, %f6, %f0 ! GRU Group - EX2(STDF %f0, [%o0] ASINORMAL, - add %o2, %g5, %o2, - add %o2, 8) ! Store - add %o1, 8, %o1 ! IEU0 Group - be,pn %xcc, end_cruft ! CTI - add %o0, 8, %o0 ! IEU1 - ASI_SETSRC_NOBLK ! LSU Group - EX(LDDF [%g1 + 0x8] ASINORMAL, %f4, - add %o2, %g5) ! Load Group - add %g1, 8, %g1 ! IEU0 - subcc %g5, 8, %g5 ! IEU1 - ASI_SETDST_NOBLK ! LSU Group - faligndata %f6, %f4, %f0 ! GRU Group - EX2(STDF %f0, [%o0] ASINORMAL, - add %o2, %g5, %o2, - add %o2, 8) ! Store - add %o1, 8, %o1 ! IEU0 - ASI_SETSRC_NOBLK ! LSU Group - bne,pn %xcc, 1b ! CTI Group - add %o0, 8, %o0 ! IEU0 -end_cruft: - brz,pn %o2, fpu_retl ! CTI Group -#ifndef __KERNEL__ - nop ! IEU0 -#else - ASI_SETSRC_NOBLK ! LSU Group -#endif - EXO2(LDUB [%o1] ASINORMAL, %g5) ! LOAD - add %o1, 1, %o1 ! IEU0 - add %o0, 1, %o0 ! IEU1 - ASI_SETDST_NOBLK ! LSU Group - subcc %o2, 1, %o2 ! IEU1 - bne,pt %xcc, vis_slp ! CTI - EX(STB %g5, [%o0 - 1] ASINORMAL, - add %o2, 1) ! Store Group -fpu_retl: - FPU_RETL - -#ifdef __KERNEL__ - .globl __memcpy_end -__memcpy_end: - - .section .fixup - .align 4 -VIScopyfixup_reto2: - mov %o2, %o1 -VIScopyfixup_ret: - /* If this is copy_from_user(), zero out the rest of the - * kernel buffer. - */ - ldub [%g6 + AOFF_task_thread + AOFF_thread_current_ds], %o4 - andcc asi_src, 0x1, %g0 - be,pt %icc, 1f - VISExit - andcc asi_dest, 0x1, %g0 - bne,pn %icc, 1f - nop - save %sp, -160, %sp - mov %i0, %o0 - call __bzero - mov %i1, %o1 - restore -1: mov %o1, %o0 - retl - wr %o4, %g0, %asi -VIScopyfixup1: subcc %g2, 18, %g2 - add %o0, 32, %o0 - bgeu,a,pt %icc, VIScopyfixup1 - sub %g7, 32, %g7 - sub %o0, 32, %o0 - rd %pc, %g5 - add %g2, (18 + 16), %g2 - ldub [%g5 + %g2], %g2 - ba,a,pt %xcc, 2f -.byte 0, 0, 0, 0, 0, 0, 0, 4, 4, 8, 12, 12, 16, 20, 20, 24, 28, 28 - .align 4 -VIScopyfixup2: mov (7 * 16), %g7 -1: subcc %g2, 10, %g2 - bgeu,a,pt %icc, 1b - sub %g7, 16, %g7 - sub %o0, %g7, %o0 - rd %pc, %g5 - add %g2, (10 + 16), %g2 - ldub [%g5 + %g2], %g2 - ba,a,pt %xcc, 4f -.byte 0, 0, 0, 0, 0, 4, 4, 8, 12, 12 - .align 4 -VIScopyfixup3: subcc %g2, 10, %g2 - add %o0, 32, %o0 - bgeu,a,pt %icc, VIScopyfixup3 - sub %g7, 32, %g7 - sub %o0, 32, %o0 - rd %pc, %g5 - add %g2, (10 + 16), %g2 - ldub [%g5 + %g2], %g2 - ba,a,pt %xcc, 2f -.byte 0, 0, 0, 0, 0, 0, 0, 8, 16, 24 - .align 4 -2: and %o2, 0x7f, %o2 - sub %g7, %g2, %g7 - ba,pt %xcc, VIScopyfixup_ret - add %g7, %o2, %o1 -VIScopyfixup4: mov (7 * 16), %g7 -3: subcc %g2, 6, %g2 - bgeu,a,pt %icc, 3b - sub %g7, 16, %g7 - sub %o0, %g7, %o0 - rd %pc, %g5 - add %g2, (6 + 16), %g2 - ldub [%g5 + %g2], %g2 - ba,a,pt %xcc, 4f -.byte 0, 0, 0, 0, 0, 8 - .align 4 -4: and %o2, 0xf, %o2 - sub %g7, %g2, %g7 - ba,pt %xcc, VIScopyfixup_ret - add %g7, %o2, %o1 -VIScopyfixup_vis2: - sub %o2, 0x40, %o2 -VIScopyfixup_vis0: - add %o2, 0x80, %o2 -VIScopyfixup_vis1: - add %g7, %g3, %g7 - ba,pt %xcc, VIScopyfixup_ret - add %o2, %g7, %o1 -VIScopyfixup_vis4: - add %g3, 8, %g3 -VIScopyfixup_vis3: - add %g3, 8, %g3 - ba,pt %xcc, VIScopyfixup_ret - add %o2, %g3, %o1 -#endif - -#ifdef __KERNEL__ - .text - .align 32 - - .globl __memmove - .type __memmove,@function - - .globl memmove - .type memmove,@function - -memmove: -__memmove: cmp %o0, %o1 - blu,pt %xcc, memcpy_private - sub %o0, %o1, %g5 - add %o1, %o2, %g3 - cmp %g3, %o0 - bleu,pt %xcc, memcpy_private - add %o1, %o2, %g5 - add %o0, %o2, %o5 - - sub %g5, 1, %o1 - sub %o5, 1, %o0 -1: ldub [%o1], %g5 - subcc %o2, 1, %o2 - sub %o1, 1, %o1 - stb %g5, [%o0] - bne,pt %icc, 1b - sub %o0, 1, %o0 - - retl - clr %o0 -#endif diff -urN linux-2.4.27/arch/sparc64/lib/copy_in_user.S linux-2.4.28/arch/sparc64/lib/copy_in_user.S --- linux-2.4.27/arch/sparc64/lib/copy_in_user.S 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.4.28/arch/sparc64/lib/copy_in_user.S 2004-11-17 03:54:21.154379638 -0800 @@ -0,0 +1,114 @@ +/* copy_in_user.S: Copy from userspace to userspace. + * + * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) + */ + +#include + +#define XCC xcc + +#define EX(x,y) \ +98: x,y; \ + .section .fixup; \ + .align 4; \ +99: retl; \ + mov 1, %o0; \ + .section __ex_table; \ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + + .register %g2,#scratch + .register %g3,#scratch + + .text + .align 32 + + /* Don't try to get too fancy here, just nice and + * simple. This is predominantly used for well aligned + * small copies in the compat layer. It is also used + * to copy register windows around during thread cloning. + */ + + .globl ___copy_in_user +___copy_in_user: /* %o0=dst, %o1=src, %o2=len */ + /* Writing to %asi is _expensive_ so we hardcode it. + * Reading %asi to check for KERNEL_DS is comparatively + * cheap. + */ + rd %asi, %g1 + cmp %g1, ASI_AIUS + bne,pn %icc, memcpy_user_stub + nop + + cmp %o2, 0 + be,pn %XCC, 85f + or %o0, %o1, %o3 + cmp %o2, 16 + bleu,a,pn %XCC, 80f + or %o3, %o2, %o3 + + /* 16 < len <= 64 */ + andcc %o3, 0x7, %g0 + bne,pn %XCC, 90f + sub %o0, %o1, %o3 + + andn %o2, 0x7, %o4 + and %o2, 0x7, %o2 +1: subcc %o4, 0x8, %o4 + EX(ldxa [%o1] %asi, %o5) + EX(stxa %o5, [%o1 + %o3] ASI_AIUS) + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 + andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 + EX(lduwa [%o1] %asi, %o5) + EX(stwa %o5, [%o1 + %o3] ASI_AIUS) + add %o1, 0x4, %o1 +1: cmp %o2, 0 + be,pt %XCC, 85f + nop + ba,pt %xcc, 90f + nop + +80: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, 90f + sub %o0, %o1, %o3 + +82: + subcc %o2, 4, %o2 + EX(lduwa [%o1] %asi, %g1) + EX(stwa %g1, [%o1 + %o3] ASI_AIUS) + bgu,pt %XCC, 82b + add %o1, 4, %o1 + +85: retl + clr %o0 + + .align 32 +90: + subcc %o2, 1, %o2 + EX(lduba [%o1] %asi, %g1) + EX(stba %g1, [%o1 + %o3] ASI_AIUS) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl + clr %o0 + + /* Act like copy_{to,in}_user(), ie. return zero instead + * of original destination pointer. This is invoked when + * copy_{to,in}_user() finds that %asi is kernel space. + */ + .globl memcpy_user_stub +memcpy_user_stub: + save %sp, -192, %sp + mov %i0, %o0 + mov %i1, %o1 + call memcpy + mov %i2, %o2 + ret + restore %g0, %g0, %o0 diff -urN linux-2.4.27/arch/sparc64/lib/memmove.S linux-2.4.28/arch/sparc64/lib/memmove.S --- linux-2.4.27/arch/sparc64/lib/memmove.S 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.4.28/arch/sparc64/lib/memmove.S 2004-11-17 03:54:21.155379680 -0800 @@ -0,0 +1,31 @@ +/* memmove.S: Simple memmove implementation. + * + * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com) + * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz) + */ + + .text + .align 32 + .globl memmove +memmove: + mov %o0, %g1 + cmp %o0, %o1 + blu,pt %xcc, memcpy + sub %o0, %o1, %g5 + add %o1, %o2, %g3 + cmp %g3, %o0 + bleu,pt %xcc, memcpy + add %o1, %o2, %g5 + add %o0, %o2, %o5 + + sub %g5, 1, %o1 + sub %o5, 1, %o0 +1: ldub [%o1], %g5 + subcc %o2, 1, %o2 + sub %o1, 1, %o1 + stb %g5, [%o0] + bne,pt %icc, 1b + sub %o0, 1, %o0 + + retl + mov %g1, %o0 diff -urN linux-2.4.27/arch/sparc64/lib/user_fixup.c linux-2.4.28/arch/sparc64/lib/user_fixup.c --- linux-2.4.27/arch/sparc64/lib/user_fixup.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.4.28/arch/sparc64/lib/user_fixup.c 2004-11-17 03:54:21.155379680 -0800 @@ -0,0 +1,68 @@ +/* user_fixup.c: Fix up user copy faults. + * + * Copyright (C) 2004 David S. Miller + */ + +#include +#include +#include +#include +#include + +/* Calculating the exact fault address when using + * block loads and stores can be very complicated. + * Instead of trying to be clever and handling all + * of the cases, just fix things up simply here. + */ + +unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size) +{ + char *dst = to; + const char __user *src = from; + + while (size--) { + if (__get_user(*dst, src)) + break; + dst++; + src++; + } + + if (size) + memset(dst, 0, size); + + return size; +} + +unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size) +{ + char __user *dst = to; + const char *src = from; + + while (size--) { + if (__put_user(*src, dst)) + break; + dst++; + src++; + } + + return size; +} + +unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size) +{ + char __user *dst = to; + char __user *src = from; + + while (size--) { + char tmp; + + if (__get_user(tmp, src)) + break; + if (__put_user(tmp, dst)) + break; + dst++; + src++; + } + + return size; +} diff -urN linux-2.4.27/arch/sparc64/mm/fault.c linux-2.4.28/arch/sparc64/mm/fault.c --- linux-2.4.27/arch/sparc64/mm/fault.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/sparc64/mm/fault.c 2004-11-17 03:54:21.156379721 -0800 @@ -253,7 +253,7 @@ * in that case. */ - if (!(fault_code & FAULT_CODE_WRITE) && + if (!(fault_code & (FAULT_CODE_WRITE|FAULT_CODE_ITLB)) && (insn & 0xc0800000) == 0xc0800000) { if (insn & 0x2000) asi = (regs->tstate >> 24); @@ -404,6 +404,16 @@ */ good_area: si_code = SEGV_ACCERR; + + /* If we took a ITLB miss on a non-executable page, catch + * that here. + */ + if ((fault_code & FAULT_CODE_ITLB) && !(vma->vm_flags & VM_EXEC)) { + BUG_ON(address != regs->tpc); + BUG_ON(regs->tstate & TSTATE_PRIV); + goto bad_area; + } + if (fault_code & FAULT_CODE_WRITE) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; diff -urN linux-2.4.27/arch/sparc64/mm/init.c linux-2.4.28/arch/sparc64/mm/init.c --- linux-2.4.27/arch/sparc64/mm/init.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/arch/sparc64/mm/init.c 2004-11-17 03:54:21.158379803 -0800 @@ -63,6 +63,7 @@ unsigned long mmu_context_bmap[CTX_BMAP_SLOTS]; /* Initial ramdisk setup */ +extern unsigned long sparc_ramdisk_image64; extern unsigned int sparc_ramdisk_image; extern unsigned int sparc_ramdisk_size; @@ -1251,10 +1252,12 @@ #ifdef CONFIG_BLK_DEV_INITRD /* Now have to check initial ramdisk, so that bootmap does not overwrite it */ - if (sparc_ramdisk_image) { - if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE) - sparc_ramdisk_image -= KERNBASE; - initrd_start = sparc_ramdisk_image + phys_base; + if (sparc_ramdisk_image || sparc_ramdisk_image64) { + unsigned long ramdisk_image = sparc_ramdisk_image ? + sparc_ramdisk_image : sparc_ramdisk_image64; + if (ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE) + ramdisk_image -= KERNBASE; + initrd_start = ramdisk_image + phys_base; initrd_end = initrd_start + sparc_ramdisk_size; if (initrd_end > end_of_phys_memory) { printk(KERN_CRIT "initrd extends beyond end of memory " @@ -1300,6 +1303,10 @@ initrd_start, size); #endif /* Resert the initrd image area. */ +#ifdef CONFIG_DEBUG_BOOTMEM + prom_printf("reserve_bootmem(initrd): base[%llx] size[%lx]\n", + initrd_start, initrd_end); +#endif reserve_bootmem(initrd_start, size); *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -1353,7 +1360,7 @@ if ((real_end > ((unsigned long)KERNBASE + 0x400000))) bigkernel = 1; #ifdef CONFIG_BLK_DEV_INITRD - if (sparc_ramdisk_image) + if (sparc_ramdisk_image || sparc_ramdisk_image64) real_end = (PAGE_ALIGN(real_end) + PAGE_ALIGN(sparc_ramdisk_size)); #endif diff -urN linux-2.4.27/arch/x86_64/boot/compressed/misc.c linux-2.4.28/arch/x86_64/boot/compressed/misc.c --- linux-2.4.27/arch/x86_64/boot/compressed/misc.c 2003-06-13 07:51:32.000000000 -0700 +++ linux-2.4.28/arch/x86_64/boot/compressed/misc.c 2004-11-17 03:54:21.158379803 -0800 @@ -96,7 +96,7 @@ static void gzip_mark(void **); static void gzip_release(void **); -static void puts(const char *); +static void putstr(const char *); extern int end; static long free_mem_ptr = (long)&end; @@ -157,7 +157,7 @@ vidmem[i] = ' '; } -static void puts(const char *s) +static void putstr(const char *s) { int x,y,pos; char c; @@ -275,9 +275,9 @@ static void error(char *x) { - puts("\n\n"); - puts(x); - puts("\n\n -- System halted"); + putstr("\n\n"); + putstr(x); + putstr("\n\n -- System halted"); while(1); } @@ -351,9 +351,9 @@ else setup_output_buffer_if_we_run_high(mv); makecrc(); - puts(".\nDecompressing Linux..."); + putstr(".\nDecompressing Linux..."); gunzip(); - puts("done.\nBooting the kernel.\n"); + putstr("done.\nBooting the kernel.\n"); if (high_loaded) close_output_buffer_if_we_run_high(mv); return high_loaded; } diff -urN linux-2.4.27/arch/x86_64/config.in linux-2.4.28/arch/x86_64/config.in --- linux-2.4.27/arch/x86_64/config.in 2004-04-14 06:05:28.000000000 -0700 +++ linux-2.4.28/arch/x86_64/config.in 2004-11-17 03:54:21.159379844 -0800 @@ -117,7 +117,7 @@ define_bool CONFIG_KCORE_ELF y fi #tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT -tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +bool 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC bool 'Power Management support' CONFIG_PM diff -urN linux-2.4.27/arch/x86_64/ia32/ia32_ioctl.c linux-2.4.28/arch/x86_64/ia32/ia32_ioctl.c --- linux-2.4.27/arch/x86_64/ia32/ia32_ioctl.c 2004-04-14 06:05:28.000000000 -0700 +++ linux-2.4.28/arch/x86_64/ia32/ia32_ioctl.c 2004-11-17 03:54:21.162379967 -0800 @@ -1171,6 +1171,7 @@ case FDDEFPRM32: case FDGETPRM32: { + u32 name; struct floppy_struct *f; f = karg = kmalloc(sizeof(struct floppy_struct), GFP_KERNEL); @@ -1187,7 +1188,8 @@ err |= __get_user(f->rate, &((struct floppy_struct32 *)arg)->rate); err |= __get_user(f->spec1, &((struct floppy_struct32 *)arg)->spec1); err |= __get_user(f->fmt_gap, &((struct floppy_struct32 *)arg)->fmt_gap); - err |= __get_user((u64)f->name, &((struct floppy_struct32 *)arg)->name); + err |= __get_user(name, &((struct floppy_struct32 *)arg)->name); + f->name = (void*)(u64)name; if (err) { err = -EFAULT; goto out; @@ -2726,13 +2728,15 @@ { struct blkpg_ioctl_arg a; struct blkpg_partition p; + u32 udata; int err; mm_segment_t old_fs = get_fs(); err = get_user(a.op, &arg->op); err |= __get_user(a.flags, &arg->flags); err |= __get_user(a.datalen, &arg->datalen); - err |= __get_user((long)a.data, &arg->data); + err |= __get_user(udata, &arg->data); + a.data = (void*)(u64)udata; if (err) return err; switch (a.op) { case BLKPG_ADD_PARTITION: diff -urN linux-2.4.27/arch/x86_64/ia32/socket32.c linux-2.4.28/arch/x86_64/ia32/socket32.c --- linux-2.4.27/arch/x86_64/ia32/socket32.c 2003-11-28 10:26:19.000000000 -0800 +++ linux-2.4.28/arch/x86_64/ia32/socket32.c 2004-11-17 03:54:21.163380008 -0800 @@ -555,7 +555,7 @@ asmlinkage long sys32_setsockopt(int fd, int level, int optname, char *optval, int optlen) { - if (optname == SO_ATTACH_FILTER) + if (level == SOL_SOCKET && optname == SO_ATTACH_FILTER) return do_set_attach_filter(fd, level, optname, optval, optlen); if (level == SOL_ICMPV6 && optname == ICMPV6_FILTER) diff -urN linux-2.4.27/arch/x86_64/ia32/sys_ia32.c linux-2.4.28/arch/x86_64/ia32/sys_ia32.c --- linux-2.4.27/arch/x86_64/ia32/sys_ia32.c 2004-04-14 06:05:28.000000000 -0700 +++ linux-2.4.28/arch/x86_64/ia32/sys_ia32.c 2004-11-17 03:54:21.165380091 -0800 @@ -384,12 +384,16 @@ return -EINVAL; if (act) { + u32 handler, restorer; + if (verify_area(VERIFY_READ, act, sizeof(*act)) || - __get_user((long)new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(handler, &act->sa_handler) || __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user((long)new_ka.sa.sa_restorer, &act->sa_restorer)|| + __get_user(restorer, &act->sa_restorer)|| __copy_from_user(&set32, &act->sa_mask, sizeof(sigset32_t))) return -EFAULT; + new_ka.sa.sa_handler = (void*)(u64)handler; + new_ka.sa.sa_restorer = (void*)(u64)restorer; /* FIXME: here we rely on _IA32_NSIG_WORS to be >= than _NSIG_WORDS << 1 */ switch (_NSIG_WORDS) { @@ -441,13 +445,16 @@ if (act) { old_sigset32_t mask; + u32 handler, restorer; if (verify_area(VERIFY_READ, act, sizeof(*act)) || - __get_user((long)new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(handler, &act->sa_handler) || __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user((long)new_ka.sa.sa_restorer, &act->sa_restorer) || + __get_user(restorer, &act->sa_restorer) || __get_user(mask, &act->sa_mask)) return -EFAULT; + new_ka.sa.sa_handler = (void*)(u64)handler; + new_ka.sa.sa_restorer = (void*)(u64)restorer; siginitset(&new_ka.sa.sa_mask, mask); } @@ -778,7 +785,7 @@ put_user(reclen, &dirent->d_reclen); copy_to_user(dirent->d_name, name, namlen); put_user(0, dirent->d_name + namlen); - ((char *) dirent) += reclen; + dirent = ((void *)dirent) + reclen; buf->current_dir = dirent; buf->count -= reclen; return 0; diff -urN linux-2.4.27/arch/x86_64/kernel/x8664_ksyms.c linux-2.4.28/arch/x86_64/kernel/x8664_ksyms.c --- linux-2.4.27/arch/x86_64/kernel/x8664_ksyms.c 2004-04-14 06:05:28.000000000 -0700 +++ linux-2.4.28/arch/x86_64/kernel/x8664_ksyms.c 2004-11-17 03:54:21.165380091 -0800 @@ -157,7 +157,7 @@ extern __kernel_size_t strlen(const char *); extern int strcmp(const char *,const char *); extern char * strcpy(char *,const char *); -extern char * bcopy(const char * src, char * dest, int count); +extern void bcopy(const void * src, void * dest, size_t count); EXPORT_SYMBOL_NOVERS(memcpy); EXPORT_SYMBOL_NOVERS(__memcpy); diff -urN linux-2.4.27/arch/x86_64/lib/usercopy.c linux-2.4.28/arch/x86_64/lib/usercopy.c --- linux-2.4.27/arch/x86_64/lib/usercopy.c 2004-04-14 06:05:28.000000000 -0700 +++ linux-2.4.28/arch/x86_64/lib/usercopy.c 2004-11-17 03:54:21.166380132 -0800 @@ -88,7 +88,7 @@ " .quad 1b,2b\n" ".previous" : [size8] "=c"(size), [dst] "=&D" (__d0) - : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst] "(addr), + : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), [zero] "r" (0UL), [eight] "r" (8UL)); return size; } diff -urN linux-2.4.27/crypto/Config.in linux-2.4.28/crypto/Config.in --- linux-2.4.27/crypto/Config.in 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/crypto/Config.in 2004-11-17 03:54:21.166380132 -0800 @@ -58,6 +58,7 @@ fi tristate ' SHA256 digest algorithm' CONFIG_CRYPTO_SHA256 tristate ' SHA384 and SHA512 digest algorithms' CONFIG_CRYPTO_SHA512 + tristate ' Whirlpool digest algorithms' CONFIG_CRYPTO_WP512 if [ "$CONFIG_INET_ESP" = "y" -o \ "$CONFIG_INET_ESP" = "m" -o \ "$CONFIG_INET6_ESP" = "y" -o \ @@ -73,6 +74,8 @@ tristate ' CAST5 (CAST-128) cipher algorithm' CONFIG_CRYPTO_CAST5 tristate ' CAST6 (CAST-256) cipher algorithm' CONFIG_CRYPTO_CAST6 tristate ' TEA and XTEA cipher algorithms' CONFIG_CRYPTO_TEA + tristate ' Khazad cipher algorithm' CONFIG_CRYPTO_KHAZAD + tristate ' Anubis cipher algorithm' CONFIG_CRYPTO_ANUBIS tristate ' ARC4 cipher algorithm' CONFIG_CRYPTO_ARC4 if [ "$CONFIG_INET_IPCOMP" = "y" -o \ "$CONFIG_INET_IPCOMP" = "m" -o \ diff -urN linux-2.4.27/crypto/Makefile linux-2.4.28/crypto/Makefile --- linux-2.4.27/crypto/Makefile 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/crypto/Makefile 2004-11-17 03:54:21.167380173 -0800 @@ -19,6 +19,7 @@ obj-$(CONFIG_CRYPTO_SHA1) += sha1.o obj-$(CONFIG_CRYPTO_SHA256) += sha256.o obj-$(CONFIG_CRYPTO_SHA512) += sha512.o +obj-$(CONFIG_CRYPTO_WP512) += wp512.o obj-$(CONFIG_CRYPTO_DES) += des.o obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o obj-$(CONFIG_CRYPTO_TWOFISH) += twofish.o @@ -28,6 +29,8 @@ obj-$(CONFIG_CRYPTO_CAST6) += cast6.o obj-$(CONFIG_CRYPTO_ARC4) += arc4.o obj-$(CONFIG_CRYPTO_TEA) += tea.o +obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o +obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o diff -urN linux-2.4.27/crypto/aes.c linux-2.4.28/crypto/aes.c --- linux-2.4.27/crypto/aes.c 2004-02-18 05:36:31.000000000 -0800 +++ linux-2.4.28/crypto/aes.c 2004-11-17 03:54:21.167380173 -0800 @@ -102,10 +102,10 @@ #define E_KEY ctx->E #define D_KEY ctx->D -static u8 pow_tab[256]; -static u8 log_tab[256]; -static u8 sbx_tab[256]; -static u8 isb_tab[256]; +static u8 pow_tab[256] __initdata; +static u8 log_tab[256] __initdata; +static u8 sbx_tab[256] __initdata; +static u8 isb_tab[256] __initdata; static u32 rco_tab[10]; static u32 ft_tab[4][256]; static u32 it_tab[4][256]; @@ -113,7 +113,7 @@ static u32 fl_tab[4][256]; static u32 il_tab[4][256]; -static inline u8 +static inline u8 __init f_mult (u8 a, u8 b) { u8 aa = log_tab[a], cc = aa + log_tab[b]; @@ -153,14 +153,14 @@ il_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) -static void +static void __init gen_tabs (void) { u32 i, t; u8 p, q; /* log and power tables for GF(2**8) finite field with - 0x011b as modular polynomial - the simplest prmitive + 0x011b as modular polynomial - the simplest primitive root is 0x03, used here to generate the tables */ for (i = 0, p = 1; i < 256; ++i) { diff -urN linux-2.4.27/crypto/anubis.c linux-2.4.28/crypto/anubis.c --- linux-2.4.27/crypto/anubis.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.4.28/crypto/anubis.c 2004-11-17 03:54:21.169380255 -0800 @@ -0,0 +1,719 @@ +/* + * Cryptographic API. + * + * Anubis Algorithm + * + * The Anubis algorithm was developed by Paulo S. L. M. Barreto and + * Vincent Rijmen. + * + * See + * + * P.S.L.M. Barreto, V. Rijmen, + * ``The Anubis block cipher,'' + * NESSIE submission, 2000. + * + * This software implements the "tweaked" version of Anubis. + * Only the S-box and (consequently) the rounds constants have been + * changed. + * + * The original authors have disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * By Aaron Grothe ajgrothe@yahoo.com, October 28, 2004 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include + +#define ANUBIS_MIN_KEY_SIZE 16 +#define ANUBIS_MAX_KEY_SIZE 40 +#define ANUBIS_BLOCK_SIZE 16 +#define ANUBIS_MAX_N 10 +#define ANUBIS_MAX_ROUNDS (8 + ANUBIS_MAX_N) + +struct anubis_ctx { + int key_len; // in bits + int R; + u32 E[ANUBIS_MAX_ROUNDS + 1][4]; + u32 D[ANUBIS_MAX_ROUNDS + 1][4]; +}; + +static const u32 T0[256] = { + 0xba69d2bbU, 0x54a84de5U, 0x2f5ebce2U, 0x74e8cd25U, + 0x53a651f7U, 0xd3bb6bd0U, 0xd2b96fd6U, 0x4d9a29b3U, + 0x50a05dfdU, 0xac458acfU, 0x8d070e09U, 0xbf63c6a5U, + 0x70e0dd3dU, 0x52a455f1U, 0x9a29527bU, 0x4c982db5U, + 0xeac98f46U, 0xd5b773c4U, 0x97336655U, 0xd1bf63dcU, + 0x3366ccaaU, 0x51a259fbU, 0x5bb671c7U, 0xa651a2f3U, + 0xdea15ffeU, 0x48903dadU, 0xa84d9ad7U, 0x992f5e71U, + 0xdbab4be0U, 0x3264c8acU, 0xb773e695U, 0xfce5d732U, + 0xe3dbab70U, 0x9e214263U, 0x913f7e41U, 0x9b2b567dU, + 0xe2d9af76U, 0xbb6bd6bdU, 0x4182199bU, 0x6edca579U, + 0xa557aef9U, 0xcb8b0b80U, 0x6bd6b167U, 0x95376e59U, + 0xa15fbee1U, 0xf3fbeb10U, 0xb17ffe81U, 0x0204080cU, + 0xcc851792U, 0xc49537a2U, 0x1d3a744eU, 0x14285078U, + 0xc39b2bb0U, 0x63c69157U, 0xdaa94fe6U, 0x5dba69d3U, + 0x5fbe61dfU, 0xdca557f2U, 0x7dfae913U, 0xcd871394U, + 0x7ffee11fU, 0x5ab475c1U, 0x6cd8ad75U, 0x5cb86dd5U, + 0xf7f3fb08U, 0x264c98d4U, 0xffe3db38U, 0xedc79354U, + 0xe8cd874aU, 0x9d274e69U, 0x6fdea17fU, 0x8e010203U, + 0x19326456U, 0xa05dbae7U, 0xf0fde71aU, 0x890f1e11U, + 0x0f1e3c22U, 0x070e1c12U, 0xaf4386c5U, 0xfbebcb20U, + 0x08102030U, 0x152a547eU, 0x0d1a342eU, 0x04081018U, + 0x01020406U, 0x64c88d45U, 0xdfa35bf8U, 0x76ecc529U, + 0x79f2f90bU, 0xdda753f4U, 0x3d7af48eU, 0x162c5874U, + 0x3f7efc82U, 0x376edcb2U, 0x6ddaa973U, 0x3870e090U, + 0xb96fdeb1U, 0x73e6d137U, 0xe9cf834cU, 0x356ad4beU, + 0x55aa49e3U, 0x71e2d93bU, 0x7bf6f107U, 0x8c050a0fU, + 0x72e4d531U, 0x880d1a17U, 0xf6f1ff0eU, 0x2a54a8fcU, + 0x3e7cf884U, 0x5ebc65d9U, 0x274e9cd2U, 0x468c0589U, + 0x0c183028U, 0x65ca8943U, 0x68d0bd6dU, 0x61c2995bU, + 0x03060c0aU, 0xc19f23bcU, 0x57ae41efU, 0xd6b17fceU, + 0xd9af43ecU, 0x58b07dcdU, 0xd8ad47eaU, 0x66cc8549U, + 0xd7b37bc8U, 0x3a74e89cU, 0xc88d078aU, 0x3c78f088U, + 0xfae9cf26U, 0x96316253U, 0xa753a6f5U, 0x982d5a77U, + 0xecc59752U, 0xb86ddab7U, 0xc7933ba8U, 0xae4182c3U, + 0x69d2b96bU, 0x4b9631a7U, 0xab4b96ddU, 0xa94f9ed1U, + 0x67ce814fU, 0x0a14283cU, 0x478e018fU, 0xf2f9ef16U, + 0xb577ee99U, 0x224488ccU, 0xe5d7b364U, 0xeec19f5eU, + 0xbe61c2a3U, 0x2b56acfaU, 0x811f3e21U, 0x1224486cU, + 0x831b362dU, 0x1b366c5aU, 0x0e1c3824U, 0x23468ccaU, + 0xf5f7f304U, 0x458a0983U, 0x214284c6U, 0xce811f9eU, + 0x499239abU, 0x2c58b0e8U, 0xf9efc32cU, 0xe6d1bf6eU, + 0xb671e293U, 0x2850a0f0U, 0x172e5c72U, 0x8219322bU, + 0x1a34685cU, 0x8b0b161dU, 0xfee1df3eU, 0x8a09121bU, + 0x09122436U, 0xc98f038cU, 0x87132635U, 0x4e9c25b9U, + 0xe1dfa37cU, 0x2e5cb8e4U, 0xe4d5b762U, 0xe0dda77aU, + 0xebcb8b40U, 0x903d7a47U, 0xa455aaffU, 0x1e3c7844U, + 0x85172e39U, 0x60c09d5dU, 0x00000000U, 0x254a94deU, + 0xf4f5f702U, 0xf1ffe31cU, 0x94356a5fU, 0x0b162c3aU, + 0xe7d3bb68U, 0x75eac923U, 0xefc39b58U, 0x3468d0b8U, + 0x3162c4a6U, 0xd4b577c2U, 0xd0bd67daU, 0x86112233U, + 0x7efce519U, 0xad478ec9U, 0xfde7d334U, 0x2952a4f6U, + 0x3060c0a0U, 0x3b76ec9aU, 0x9f234665U, 0xf8edc72aU, + 0xc6913faeU, 0x13264c6aU, 0x060c1814U, 0x050a141eU, + 0xc59733a4U, 0x11224466U, 0x77eec12fU, 0x7cf8ed15U, + 0x7af4f501U, 0x78f0fd0dU, 0x366cd8b4U, 0x1c387048U, + 0x3972e496U, 0x59b279cbU, 0x18306050U, 0x56ac45e9U, + 0xb37bf68dU, 0xb07dfa87U, 0x244890d8U, 0x204080c0U, + 0xb279f28bU, 0x9239724bU, 0xa35bb6edU, 0xc09d27baU, + 0x44880d85U, 0x62c49551U, 0x10204060U, 0xb475ea9fU, + 0x84152a3fU, 0x43861197U, 0x933b764dU, 0xc2992fb6U, + 0x4a9435a1U, 0xbd67cea9U, 0x8f030605U, 0x2d5ab4eeU, + 0xbc65caafU, 0x9c254a6fU, 0x6ad4b561U, 0x40801d9dU, + 0xcf831b98U, 0xa259b2ebU, 0x801d3a27U, 0x4f9e21bfU, + 0x1f3e7c42U, 0xca890f86U, 0xaa4992dbU, 0x42841591U, +}; + +static const u32 T1[256] = { + 0x69babbd2U, 0xa854e54dU, 0x5e2fe2bcU, 0xe87425cdU, + 0xa653f751U, 0xbbd3d06bU, 0xb9d2d66fU, 0x9a4db329U, + 0xa050fd5dU, 0x45accf8aU, 0x078d090eU, 0x63bfa5c6U, + 0xe0703dddU, 0xa452f155U, 0x299a7b52U, 0x984cb52dU, + 0xc9ea468fU, 0xb7d5c473U, 0x33975566U, 0xbfd1dc63U, + 0x6633aaccU, 0xa251fb59U, 0xb65bc771U, 0x51a6f3a2U, + 0xa1defe5fU, 0x9048ad3dU, 0x4da8d79aU, 0x2f99715eU, + 0xabdbe04bU, 0x6432acc8U, 0x73b795e6U, 0xe5fc32d7U, + 0xdbe370abU, 0x219e6342U, 0x3f91417eU, 0x2b9b7d56U, + 0xd9e276afU, 0x6bbbbdd6U, 0x82419b19U, 0xdc6e79a5U, + 0x57a5f9aeU, 0x8bcb800bU, 0xd66b67b1U, 0x3795596eU, + 0x5fa1e1beU, 0xfbf310ebU, 0x7fb181feU, 0x04020c08U, + 0x85cc9217U, 0x95c4a237U, 0x3a1d4e74U, 0x28147850U, + 0x9bc3b02bU, 0xc6635791U, 0xa9dae64fU, 0xba5dd369U, + 0xbe5fdf61U, 0xa5dcf257U, 0xfa7d13e9U, 0x87cd9413U, + 0xfe7f1fe1U, 0xb45ac175U, 0xd86c75adU, 0xb85cd56dU, + 0xf3f708fbU, 0x4c26d498U, 0xe3ff38dbU, 0xc7ed5493U, + 0xcde84a87U, 0x279d694eU, 0xde6f7fa1U, 0x018e0302U, + 0x32195664U, 0x5da0e7baU, 0xfdf01ae7U, 0x0f89111eU, + 0x1e0f223cU, 0x0e07121cU, 0x43afc586U, 0xebfb20cbU, + 0x10083020U, 0x2a157e54U, 0x1a0d2e34U, 0x08041810U, + 0x02010604U, 0xc864458dU, 0xa3dff85bU, 0xec7629c5U, + 0xf2790bf9U, 0xa7ddf453U, 0x7a3d8ef4U, 0x2c167458U, + 0x7e3f82fcU, 0x6e37b2dcU, 0xda6d73a9U, 0x703890e0U, + 0x6fb9b1deU, 0xe67337d1U, 0xcfe94c83U, 0x6a35bed4U, + 0xaa55e349U, 0xe2713bd9U, 0xf67b07f1U, 0x058c0f0aU, + 0xe47231d5U, 0x0d88171aU, 0xf1f60effU, 0x542afca8U, + 0x7c3e84f8U, 0xbc5ed965U, 0x4e27d29cU, 0x8c468905U, + 0x180c2830U, 0xca654389U, 0xd0686dbdU, 0xc2615b99U, + 0x06030a0cU, 0x9fc1bc23U, 0xae57ef41U, 0xb1d6ce7fU, + 0xafd9ec43U, 0xb058cd7dU, 0xadd8ea47U, 0xcc664985U, + 0xb3d7c87bU, 0x743a9ce8U, 0x8dc88a07U, 0x783c88f0U, + 0xe9fa26cfU, 0x31965362U, 0x53a7f5a6U, 0x2d98775aU, + 0xc5ec5297U, 0x6db8b7daU, 0x93c7a83bU, 0x41aec382U, + 0xd2696bb9U, 0x964ba731U, 0x4babdd96U, 0x4fa9d19eU, + 0xce674f81U, 0x140a3c28U, 0x8e478f01U, 0xf9f216efU, + 0x77b599eeU, 0x4422cc88U, 0xd7e564b3U, 0xc1ee5e9fU, + 0x61bea3c2U, 0x562bfaacU, 0x1f81213eU, 0x24126c48U, + 0x1b832d36U, 0x361b5a6cU, 0x1c0e2438U, 0x4623ca8cU, + 0xf7f504f3U, 0x8a458309U, 0x4221c684U, 0x81ce9e1fU, + 0x9249ab39U, 0x582ce8b0U, 0xeff92cc3U, 0xd1e66ebfU, + 0x71b693e2U, 0x5028f0a0U, 0x2e17725cU, 0x19822b32U, + 0x341a5c68U, 0x0b8b1d16U, 0xe1fe3edfU, 0x098a1b12U, + 0x12093624U, 0x8fc98c03U, 0x13873526U, 0x9c4eb925U, + 0xdfe17ca3U, 0x5c2ee4b8U, 0xd5e462b7U, 0xdde07aa7U, + 0xcbeb408bU, 0x3d90477aU, 0x55a4ffaaU, 0x3c1e4478U, + 0x1785392eU, 0xc0605d9dU, 0x00000000U, 0x4a25de94U, + 0xf5f402f7U, 0xfff11ce3U, 0x35945f6aU, 0x160b3a2cU, + 0xd3e768bbU, 0xea7523c9U, 0xc3ef589bU, 0x6834b8d0U, + 0x6231a6c4U, 0xb5d4c277U, 0xbdd0da67U, 0x11863322U, + 0xfc7e19e5U, 0x47adc98eU, 0xe7fd34d3U, 0x5229f6a4U, + 0x6030a0c0U, 0x763b9aecU, 0x239f6546U, 0xedf82ac7U, + 0x91c6ae3fU, 0x26136a4cU, 0x0c061418U, 0x0a051e14U, + 0x97c5a433U, 0x22116644U, 0xee772fc1U, 0xf87c15edU, + 0xf47a01f5U, 0xf0780dfdU, 0x6c36b4d8U, 0x381c4870U, + 0x723996e4U, 0xb259cb79U, 0x30185060U, 0xac56e945U, + 0x7bb38df6U, 0x7db087faU, 0x4824d890U, 0x4020c080U, + 0x79b28bf2U, 0x39924b72U, 0x5ba3edb6U, 0x9dc0ba27U, + 0x8844850dU, 0xc4625195U, 0x20106040U, 0x75b49feaU, + 0x15843f2aU, 0x86439711U, 0x3b934d76U, 0x99c2b62fU, + 0x944aa135U, 0x67bda9ceU, 0x038f0506U, 0x5a2deeb4U, + 0x65bcafcaU, 0x259c6f4aU, 0xd46a61b5U, 0x80409d1dU, + 0x83cf981bU, 0x59a2ebb2U, 0x1d80273aU, 0x9e4fbf21U, + 0x3e1f427cU, 0x89ca860fU, 0x49aadb92U, 0x84429115U, +}; + +static const u32 T2[256] = { + 0xd2bbba69U, 0x4de554a8U, 0xbce22f5eU, 0xcd2574e8U, + 0x51f753a6U, 0x6bd0d3bbU, 0x6fd6d2b9U, 0x29b34d9aU, + 0x5dfd50a0U, 0x8acfac45U, 0x0e098d07U, 0xc6a5bf63U, + 0xdd3d70e0U, 0x55f152a4U, 0x527b9a29U, 0x2db54c98U, + 0x8f46eac9U, 0x73c4d5b7U, 0x66559733U, 0x63dcd1bfU, + 0xccaa3366U, 0x59fb51a2U, 0x71c75bb6U, 0xa2f3a651U, + 0x5ffedea1U, 0x3dad4890U, 0x9ad7a84dU, 0x5e71992fU, + 0x4be0dbabU, 0xc8ac3264U, 0xe695b773U, 0xd732fce5U, + 0xab70e3dbU, 0x42639e21U, 0x7e41913fU, 0x567d9b2bU, + 0xaf76e2d9U, 0xd6bdbb6bU, 0x199b4182U, 0xa5796edcU, + 0xaef9a557U, 0x0b80cb8bU, 0xb1676bd6U, 0x6e599537U, + 0xbee1a15fU, 0xeb10f3fbU, 0xfe81b17fU, 0x080c0204U, + 0x1792cc85U, 0x37a2c495U, 0x744e1d3aU, 0x50781428U, + 0x2bb0c39bU, 0x915763c6U, 0x4fe6daa9U, 0x69d35dbaU, + 0x61df5fbeU, 0x57f2dca5U, 0xe9137dfaU, 0x1394cd87U, + 0xe11f7ffeU, 0x75c15ab4U, 0xad756cd8U, 0x6dd55cb8U, + 0xfb08f7f3U, 0x98d4264cU, 0xdb38ffe3U, 0x9354edc7U, + 0x874ae8cdU, 0x4e699d27U, 0xa17f6fdeU, 0x02038e01U, + 0x64561932U, 0xbae7a05dU, 0xe71af0fdU, 0x1e11890fU, + 0x3c220f1eU, 0x1c12070eU, 0x86c5af43U, 0xcb20fbebU, + 0x20300810U, 0x547e152aU, 0x342e0d1aU, 0x10180408U, + 0x04060102U, 0x8d4564c8U, 0x5bf8dfa3U, 0xc52976ecU, + 0xf90b79f2U, 0x53f4dda7U, 0xf48e3d7aU, 0x5874162cU, + 0xfc823f7eU, 0xdcb2376eU, 0xa9736ddaU, 0xe0903870U, + 0xdeb1b96fU, 0xd13773e6U, 0x834ce9cfU, 0xd4be356aU, + 0x49e355aaU, 0xd93b71e2U, 0xf1077bf6U, 0x0a0f8c05U, + 0xd53172e4U, 0x1a17880dU, 0xff0ef6f1U, 0xa8fc2a54U, + 0xf8843e7cU, 0x65d95ebcU, 0x9cd2274eU, 0x0589468cU, + 0x30280c18U, 0x894365caU, 0xbd6d68d0U, 0x995b61c2U, + 0x0c0a0306U, 0x23bcc19fU, 0x41ef57aeU, 0x7fced6b1U, + 0x43ecd9afU, 0x7dcd58b0U, 0x47ead8adU, 0x854966ccU, + 0x7bc8d7b3U, 0xe89c3a74U, 0x078ac88dU, 0xf0883c78U, + 0xcf26fae9U, 0x62539631U, 0xa6f5a753U, 0x5a77982dU, + 0x9752ecc5U, 0xdab7b86dU, 0x3ba8c793U, 0x82c3ae41U, + 0xb96b69d2U, 0x31a74b96U, 0x96ddab4bU, 0x9ed1a94fU, + 0x814f67ceU, 0x283c0a14U, 0x018f478eU, 0xef16f2f9U, + 0xee99b577U, 0x88cc2244U, 0xb364e5d7U, 0x9f5eeec1U, + 0xc2a3be61U, 0xacfa2b56U, 0x3e21811fU, 0x486c1224U, + 0x362d831bU, 0x6c5a1b36U, 0x38240e1cU, 0x8cca2346U, + 0xf304f5f7U, 0x0983458aU, 0x84c62142U, 0x1f9ece81U, + 0x39ab4992U, 0xb0e82c58U, 0xc32cf9efU, 0xbf6ee6d1U, + 0xe293b671U, 0xa0f02850U, 0x5c72172eU, 0x322b8219U, + 0x685c1a34U, 0x161d8b0bU, 0xdf3efee1U, 0x121b8a09U, + 0x24360912U, 0x038cc98fU, 0x26358713U, 0x25b94e9cU, + 0xa37ce1dfU, 0xb8e42e5cU, 0xb762e4d5U, 0xa77ae0ddU, + 0x8b40ebcbU, 0x7a47903dU, 0xaaffa455U, 0x78441e3cU, + 0x2e398517U, 0x9d5d60c0U, 0x00000000U, 0x94de254aU, + 0xf702f4f5U, 0xe31cf1ffU, 0x6a5f9435U, 0x2c3a0b16U, + 0xbb68e7d3U, 0xc92375eaU, 0x9b58efc3U, 0xd0b83468U, + 0xc4a63162U, 0x77c2d4b5U, 0x67dad0bdU, 0x22338611U, + 0xe5197efcU, 0x8ec9ad47U, 0xd334fde7U, 0xa4f62952U, + 0xc0a03060U, 0xec9a3b76U, 0x46659f23U, 0xc72af8edU, + 0x3faec691U, 0x4c6a1326U, 0x1814060cU, 0x141e050aU, + 0x33a4c597U, 0x44661122U, 0xc12f77eeU, 0xed157cf8U, + 0xf5017af4U, 0xfd0d78f0U, 0xd8b4366cU, 0x70481c38U, + 0xe4963972U, 0x79cb59b2U, 0x60501830U, 0x45e956acU, + 0xf68db37bU, 0xfa87b07dU, 0x90d82448U, 0x80c02040U, + 0xf28bb279U, 0x724b9239U, 0xb6eda35bU, 0x27bac09dU, + 0x0d854488U, 0x955162c4U, 0x40601020U, 0xea9fb475U, + 0x2a3f8415U, 0x11974386U, 0x764d933bU, 0x2fb6c299U, + 0x35a14a94U, 0xcea9bd67U, 0x06058f03U, 0xb4ee2d5aU, + 0xcaafbc65U, 0x4a6f9c25U, 0xb5616ad4U, 0x1d9d4080U, + 0x1b98cf83U, 0xb2eba259U, 0x3a27801dU, 0x21bf4f9eU, + 0x7c421f3eU, 0x0f86ca89U, 0x92dbaa49U, 0x15914284U, +}; + +static const u32 T3[256] = { + 0xbbd269baU, 0xe54da854U, 0xe2bc5e2fU, 0x25cde874U, + 0xf751a653U, 0xd06bbbd3U, 0xd66fb9d2U, 0xb3299a4dU, + 0xfd5da050U, 0xcf8a45acU, 0x090e078dU, 0xa5c663bfU, + 0x3ddde070U, 0xf155a452U, 0x7b52299aU, 0xb52d984cU, + 0x468fc9eaU, 0xc473b7d5U, 0x55663397U, 0xdc63bfd1U, + 0xaacc6633U, 0xfb59a251U, 0xc771b65bU, 0xf3a251a6U, + 0xfe5fa1deU, 0xad3d9048U, 0xd79a4da8U, 0x715e2f99U, + 0xe04babdbU, 0xacc86432U, 0x95e673b7U, 0x32d7e5fcU, + 0x70abdbe3U, 0x6342219eU, 0x417e3f91U, 0x7d562b9bU, + 0x76afd9e2U, 0xbdd66bbbU, 0x9b198241U, 0x79a5dc6eU, + 0xf9ae57a5U, 0x800b8bcbU, 0x67b1d66bU, 0x596e3795U, + 0xe1be5fa1U, 0x10ebfbf3U, 0x81fe7fb1U, 0x0c080402U, + 0x921785ccU, 0xa23795c4U, 0x4e743a1dU, 0x78502814U, + 0xb02b9bc3U, 0x5791c663U, 0xe64fa9daU, 0xd369ba5dU, + 0xdf61be5fU, 0xf257a5dcU, 0x13e9fa7dU, 0x941387cdU, + 0x1fe1fe7fU, 0xc175b45aU, 0x75add86cU, 0xd56db85cU, + 0x08fbf3f7U, 0xd4984c26U, 0x38dbe3ffU, 0x5493c7edU, + 0x4a87cde8U, 0x694e279dU, 0x7fa1de6fU, 0x0302018eU, + 0x56643219U, 0xe7ba5da0U, 0x1ae7fdf0U, 0x111e0f89U, + 0x223c1e0fU, 0x121c0e07U, 0xc58643afU, 0x20cbebfbU, + 0x30201008U, 0x7e542a15U, 0x2e341a0dU, 0x18100804U, + 0x06040201U, 0x458dc864U, 0xf85ba3dfU, 0x29c5ec76U, + 0x0bf9f279U, 0xf453a7ddU, 0x8ef47a3dU, 0x74582c16U, + 0x82fc7e3fU, 0xb2dc6e37U, 0x73a9da6dU, 0x90e07038U, + 0xb1de6fb9U, 0x37d1e673U, 0x4c83cfe9U, 0xbed46a35U, + 0xe349aa55U, 0x3bd9e271U, 0x07f1f67bU, 0x0f0a058cU, + 0x31d5e472U, 0x171a0d88U, 0x0efff1f6U, 0xfca8542aU, + 0x84f87c3eU, 0xd965bc5eU, 0xd29c4e27U, 0x89058c46U, + 0x2830180cU, 0x4389ca65U, 0x6dbdd068U, 0x5b99c261U, + 0x0a0c0603U, 0xbc239fc1U, 0xef41ae57U, 0xce7fb1d6U, + 0xec43afd9U, 0xcd7db058U, 0xea47add8U, 0x4985cc66U, + 0xc87bb3d7U, 0x9ce8743aU, 0x8a078dc8U, 0x88f0783cU, + 0x26cfe9faU, 0x53623196U, 0xf5a653a7U, 0x775a2d98U, + 0x5297c5ecU, 0xb7da6db8U, 0xa83b93c7U, 0xc38241aeU, + 0x6bb9d269U, 0xa731964bU, 0xdd964babU, 0xd19e4fa9U, + 0x4f81ce67U, 0x3c28140aU, 0x8f018e47U, 0x16eff9f2U, + 0x99ee77b5U, 0xcc884422U, 0x64b3d7e5U, 0x5e9fc1eeU, + 0xa3c261beU, 0xfaac562bU, 0x213e1f81U, 0x6c482412U, + 0x2d361b83U, 0x5a6c361bU, 0x24381c0eU, 0xca8c4623U, + 0x04f3f7f5U, 0x83098a45U, 0xc6844221U, 0x9e1f81ceU, + 0xab399249U, 0xe8b0582cU, 0x2cc3eff9U, 0x6ebfd1e6U, + 0x93e271b6U, 0xf0a05028U, 0x725c2e17U, 0x2b321982U, + 0x5c68341aU, 0x1d160b8bU, 0x3edfe1feU, 0x1b12098aU, + 0x36241209U, 0x8c038fc9U, 0x35261387U, 0xb9259c4eU, + 0x7ca3dfe1U, 0xe4b85c2eU, 0x62b7d5e4U, 0x7aa7dde0U, + 0x408bcbebU, 0x477a3d90U, 0xffaa55a4U, 0x44783c1eU, + 0x392e1785U, 0x5d9dc060U, 0x00000000U, 0xde944a25U, + 0x02f7f5f4U, 0x1ce3fff1U, 0x5f6a3594U, 0x3a2c160bU, + 0x68bbd3e7U, 0x23c9ea75U, 0x589bc3efU, 0xb8d06834U, + 0xa6c46231U, 0xc277b5d4U, 0xda67bdd0U, 0x33221186U, + 0x19e5fc7eU, 0xc98e47adU, 0x34d3e7fdU, 0xf6a45229U, + 0xa0c06030U, 0x9aec763bU, 0x6546239fU, 0x2ac7edf8U, + 0xae3f91c6U, 0x6a4c2613U, 0x14180c06U, 0x1e140a05U, + 0xa43397c5U, 0x66442211U, 0x2fc1ee77U, 0x15edf87cU, + 0x01f5f47aU, 0x0dfdf078U, 0xb4d86c36U, 0x4870381cU, + 0x96e47239U, 0xcb79b259U, 0x50603018U, 0xe945ac56U, + 0x8df67bb3U, 0x87fa7db0U, 0xd8904824U, 0xc0804020U, + 0x8bf279b2U, 0x4b723992U, 0xedb65ba3U, 0xba279dc0U, + 0x850d8844U, 0x5195c462U, 0x60402010U, 0x9fea75b4U, + 0x3f2a1584U, 0x97118643U, 0x4d763b93U, 0xb62f99c2U, + 0xa135944aU, 0xa9ce67bdU, 0x0506038fU, 0xeeb45a2dU, + 0xafca65bcU, 0x6f4a259cU, 0x61b5d46aU, 0x9d1d8040U, + 0x981b83cfU, 0xebb259a2U, 0x273a1d80U, 0xbf219e4fU, + 0x427c3e1fU, 0x860f89caU, 0xdb9249aaU, 0x91158442U, +}; + +static const u32 T4[256] = { + 0xbabababaU, 0x54545454U, 0x2f2f2f2fU, 0x74747474U, + 0x53535353U, 0xd3d3d3d3U, 0xd2d2d2d2U, 0x4d4d4d4dU, + 0x50505050U, 0xacacacacU, 0x8d8d8d8dU, 0xbfbfbfbfU, + 0x70707070U, 0x52525252U, 0x9a9a9a9aU, 0x4c4c4c4cU, + 0xeaeaeaeaU, 0xd5d5d5d5U, 0x97979797U, 0xd1d1d1d1U, + 0x33333333U, 0x51515151U, 0x5b5b5b5bU, 0xa6a6a6a6U, + 0xdedededeU, 0x48484848U, 0xa8a8a8a8U, 0x99999999U, + 0xdbdbdbdbU, 0x32323232U, 0xb7b7b7b7U, 0xfcfcfcfcU, + 0xe3e3e3e3U, 0x9e9e9e9eU, 0x91919191U, 0x9b9b9b9bU, + 0xe2e2e2e2U, 0xbbbbbbbbU, 0x41414141U, 0x6e6e6e6eU, + 0xa5a5a5a5U, 0xcbcbcbcbU, 0x6b6b6b6bU, 0x95959595U, + 0xa1a1a1a1U, 0xf3f3f3f3U, 0xb1b1b1b1U, 0x02020202U, + 0xccccccccU, 0xc4c4c4c4U, 0x1d1d1d1dU, 0x14141414U, + 0xc3c3c3c3U, 0x63636363U, 0xdadadadaU, 0x5d5d5d5dU, + 0x5f5f5f5fU, 0xdcdcdcdcU, 0x7d7d7d7dU, 0xcdcdcdcdU, + 0x7f7f7f7fU, 0x5a5a5a5aU, 0x6c6c6c6cU, 0x5c5c5c5cU, + 0xf7f7f7f7U, 0x26262626U, 0xffffffffU, 0xededededU, + 0xe8e8e8e8U, 0x9d9d9d9dU, 0x6f6f6f6fU, 0x8e8e8e8eU, + 0x19191919U, 0xa0a0a0a0U, 0xf0f0f0f0U, 0x89898989U, + 0x0f0f0f0fU, 0x07070707U, 0xafafafafU, 0xfbfbfbfbU, + 0x08080808U, 0x15151515U, 0x0d0d0d0dU, 0x04040404U, + 0x01010101U, 0x64646464U, 0xdfdfdfdfU, 0x76767676U, + 0x79797979U, 0xddddddddU, 0x3d3d3d3dU, 0x16161616U, + 0x3f3f3f3fU, 0x37373737U, 0x6d6d6d6dU, 0x38383838U, + 0xb9b9b9b9U, 0x73737373U, 0xe9e9e9e9U, 0x35353535U, + 0x55555555U, 0x71717171U, 0x7b7b7b7bU, 0x8c8c8c8cU, + 0x72727272U, 0x88888888U, 0xf6f6f6f6U, 0x2a2a2a2aU, + 0x3e3e3e3eU, 0x5e5e5e5eU, 0x27272727U, 0x46464646U, + 0x0c0c0c0cU, 0x65656565U, 0x68686868U, 0x61616161U, + 0x03030303U, 0xc1c1c1c1U, 0x57575757U, 0xd6d6d6d6U, + 0xd9d9d9d9U, 0x58585858U, 0xd8d8d8d8U, 0x66666666U, + 0xd7d7d7d7U, 0x3a3a3a3aU, 0xc8c8c8c8U, 0x3c3c3c3cU, + 0xfafafafaU, 0x96969696U, 0xa7a7a7a7U, 0x98989898U, + 0xececececU, 0xb8b8b8b8U, 0xc7c7c7c7U, 0xaeaeaeaeU, + 0x69696969U, 0x4b4b4b4bU, 0xababababU, 0xa9a9a9a9U, + 0x67676767U, 0x0a0a0a0aU, 0x47474747U, 0xf2f2f2f2U, + 0xb5b5b5b5U, 0x22222222U, 0xe5e5e5e5U, 0xeeeeeeeeU, + 0xbebebebeU, 0x2b2b2b2bU, 0x81818181U, 0x12121212U, + 0x83838383U, 0x1b1b1b1bU, 0x0e0e0e0eU, 0x23232323U, + 0xf5f5f5f5U, 0x45454545U, 0x21212121U, 0xcecececeU, + 0x49494949U, 0x2c2c2c2cU, 0xf9f9f9f9U, 0xe6e6e6e6U, + 0xb6b6b6b6U, 0x28282828U, 0x17171717U, 0x82828282U, + 0x1a1a1a1aU, 0x8b8b8b8bU, 0xfefefefeU, 0x8a8a8a8aU, + 0x09090909U, 0xc9c9c9c9U, 0x87878787U, 0x4e4e4e4eU, + 0xe1e1e1e1U, 0x2e2e2e2eU, 0xe4e4e4e4U, 0xe0e0e0e0U, + 0xebebebebU, 0x90909090U, 0xa4a4a4a4U, 0x1e1e1e1eU, + 0x85858585U, 0x60606060U, 0x00000000U, 0x25252525U, + 0xf4f4f4f4U, 0xf1f1f1f1U, 0x94949494U, 0x0b0b0b0bU, + 0xe7e7e7e7U, 0x75757575U, 0xefefefefU, 0x34343434U, + 0x31313131U, 0xd4d4d4d4U, 0xd0d0d0d0U, 0x86868686U, + 0x7e7e7e7eU, 0xadadadadU, 0xfdfdfdfdU, 0x29292929U, + 0x30303030U, 0x3b3b3b3bU, 0x9f9f9f9fU, 0xf8f8f8f8U, + 0xc6c6c6c6U, 0x13131313U, 0x06060606U, 0x05050505U, + 0xc5c5c5c5U, 0x11111111U, 0x77777777U, 0x7c7c7c7cU, + 0x7a7a7a7aU, 0x78787878U, 0x36363636U, 0x1c1c1c1cU, + 0x39393939U, 0x59595959U, 0x18181818U, 0x56565656U, + 0xb3b3b3b3U, 0xb0b0b0b0U, 0x24242424U, 0x20202020U, + 0xb2b2b2b2U, 0x92929292U, 0xa3a3a3a3U, 0xc0c0c0c0U, + 0x44444444U, 0x62626262U, 0x10101010U, 0xb4b4b4b4U, + 0x84848484U, 0x43434343U, 0x93939393U, 0xc2c2c2c2U, + 0x4a4a4a4aU, 0xbdbdbdbdU, 0x8f8f8f8fU, 0x2d2d2d2dU, + 0xbcbcbcbcU, 0x9c9c9c9cU, 0x6a6a6a6aU, 0x40404040U, + 0xcfcfcfcfU, 0xa2a2a2a2U, 0x80808080U, 0x4f4f4f4fU, + 0x1f1f1f1fU, 0xcacacacaU, 0xaaaaaaaaU, 0x42424242U, +}; + +static const u32 T5[256] = { + 0x00000000U, 0x01020608U, 0x02040c10U, 0x03060a18U, + 0x04081820U, 0x050a1e28U, 0x060c1430U, 0x070e1238U, + 0x08103040U, 0x09123648U, 0x0a143c50U, 0x0b163a58U, + 0x0c182860U, 0x0d1a2e68U, 0x0e1c2470U, 0x0f1e2278U, + 0x10206080U, 0x11226688U, 0x12246c90U, 0x13266a98U, + 0x142878a0U, 0x152a7ea8U, 0x162c74b0U, 0x172e72b8U, + 0x183050c0U, 0x193256c8U, 0x1a345cd0U, 0x1b365ad8U, + 0x1c3848e0U, 0x1d3a4ee8U, 0x1e3c44f0U, 0x1f3e42f8U, + 0x2040c01dU, 0x2142c615U, 0x2244cc0dU, 0x2346ca05U, + 0x2448d83dU, 0x254ade35U, 0x264cd42dU, 0x274ed225U, + 0x2850f05dU, 0x2952f655U, 0x2a54fc4dU, 0x2b56fa45U, + 0x2c58e87dU, 0x2d5aee75U, 0x2e5ce46dU, 0x2f5ee265U, + 0x3060a09dU, 0x3162a695U, 0x3264ac8dU, 0x3366aa85U, + 0x3468b8bdU, 0x356abeb5U, 0x366cb4adU, 0x376eb2a5U, + 0x387090ddU, 0x397296d5U, 0x3a749ccdU, 0x3b769ac5U, + 0x3c7888fdU, 0x3d7a8ef5U, 0x3e7c84edU, 0x3f7e82e5U, + 0x40809d3aU, 0x41829b32U, 0x4284912aU, 0x43869722U, + 0x4488851aU, 0x458a8312U, 0x468c890aU, 0x478e8f02U, + 0x4890ad7aU, 0x4992ab72U, 0x4a94a16aU, 0x4b96a762U, + 0x4c98b55aU, 0x4d9ab352U, 0x4e9cb94aU, 0x4f9ebf42U, + 0x50a0fdbaU, 0x51a2fbb2U, 0x52a4f1aaU, 0x53a6f7a2U, + 0x54a8e59aU, 0x55aae392U, 0x56ace98aU, 0x57aeef82U, + 0x58b0cdfaU, 0x59b2cbf2U, 0x5ab4c1eaU, 0x5bb6c7e2U, + 0x5cb8d5daU, 0x5dbad3d2U, 0x5ebcd9caU, 0x5fbedfc2U, + 0x60c05d27U, 0x61c25b2fU, 0x62c45137U, 0x63c6573fU, + 0x64c84507U, 0x65ca430fU, 0x66cc4917U, 0x67ce4f1fU, + 0x68d06d67U, 0x69d26b6fU, 0x6ad46177U, 0x6bd6677fU, + 0x6cd87547U, 0x6dda734fU, 0x6edc7957U, 0x6fde7f5fU, + 0x70e03da7U, 0x71e23bafU, 0x72e431b7U, 0x73e637bfU, + 0x74e82587U, 0x75ea238fU, 0x76ec2997U, 0x77ee2f9fU, + 0x78f00de7U, 0x79f20befU, 0x7af401f7U, 0x7bf607ffU, + 0x7cf815c7U, 0x7dfa13cfU, 0x7efc19d7U, 0x7ffe1fdfU, + 0x801d2774U, 0x811f217cU, 0x82192b64U, 0x831b2d6cU, + 0x84153f54U, 0x8517395cU, 0x86113344U, 0x8713354cU, + 0x880d1734U, 0x890f113cU, 0x8a091b24U, 0x8b0b1d2cU, + 0x8c050f14U, 0x8d07091cU, 0x8e010304U, 0x8f03050cU, + 0x903d47f4U, 0x913f41fcU, 0x92394be4U, 0x933b4decU, + 0x94355fd4U, 0x953759dcU, 0x963153c4U, 0x973355ccU, + 0x982d77b4U, 0x992f71bcU, 0x9a297ba4U, 0x9b2b7dacU, + 0x9c256f94U, 0x9d27699cU, 0x9e216384U, 0x9f23658cU, + 0xa05de769U, 0xa15fe161U, 0xa259eb79U, 0xa35bed71U, + 0xa455ff49U, 0xa557f941U, 0xa651f359U, 0xa753f551U, + 0xa84dd729U, 0xa94fd121U, 0xaa49db39U, 0xab4bdd31U, + 0xac45cf09U, 0xad47c901U, 0xae41c319U, 0xaf43c511U, + 0xb07d87e9U, 0xb17f81e1U, 0xb2798bf9U, 0xb37b8df1U, + 0xb4759fc9U, 0xb57799c1U, 0xb67193d9U, 0xb77395d1U, + 0xb86db7a9U, 0xb96fb1a1U, 0xba69bbb9U, 0xbb6bbdb1U, + 0xbc65af89U, 0xbd67a981U, 0xbe61a399U, 0xbf63a591U, + 0xc09dba4eU, 0xc19fbc46U, 0xc299b65eU, 0xc39bb056U, + 0xc495a26eU, 0xc597a466U, 0xc691ae7eU, 0xc793a876U, + 0xc88d8a0eU, 0xc98f8c06U, 0xca89861eU, 0xcb8b8016U, + 0xcc85922eU, 0xcd879426U, 0xce819e3eU, 0xcf839836U, + 0xd0bddaceU, 0xd1bfdcc6U, 0xd2b9d6deU, 0xd3bbd0d6U, + 0xd4b5c2eeU, 0xd5b7c4e6U, 0xd6b1cefeU, 0xd7b3c8f6U, + 0xd8adea8eU, 0xd9afec86U, 0xdaa9e69eU, 0xdbabe096U, + 0xdca5f2aeU, 0xdda7f4a6U, 0xdea1febeU, 0xdfa3f8b6U, + 0xe0dd7a53U, 0xe1df7c5bU, 0xe2d97643U, 0xe3db704bU, + 0xe4d56273U, 0xe5d7647bU, 0xe6d16e63U, 0xe7d3686bU, + 0xe8cd4a13U, 0xe9cf4c1bU, 0xeac94603U, 0xebcb400bU, + 0xecc55233U, 0xedc7543bU, 0xeec15e23U, 0xefc3582bU, + 0xf0fd1ad3U, 0xf1ff1cdbU, 0xf2f916c3U, 0xf3fb10cbU, + 0xf4f502f3U, 0xf5f704fbU, 0xf6f10ee3U, 0xf7f308ebU, + 0xf8ed2a93U, 0xf9ef2c9bU, 0xfae92683U, 0xfbeb208bU, + 0xfce532b3U, 0xfde734bbU, 0xfee13ea3U, 0xffe338abU, +}; + +static const u32 rc[] = { + 0xba542f74U, 0x53d3d24dU, 0x50ac8dbfU, 0x70529a4cU, + 0xead597d1U, 0x33515ba6U, 0xde48a899U, 0xdb32b7fcU, + 0xe39e919bU, 0xe2bb416eU, 0xa5cb6b95U, 0xa1f3b102U, + 0xccc41d14U, 0xc363da5dU, 0x5fdc7dcdU, 0x7f5a6c5cU, + 0xf726ffedU, 0xe89d6f8eU, 0x19a0f089U, +}; + +static int anubis_setkey(void *ctx_arg, const u8 *in_key, + unsigned int key_len, u32 *flags) +{ + + int N, R, i, pos, r; + u32 kappa[ANUBIS_MAX_N]; + u32 inter[ANUBIS_MAX_N]; + + struct anubis_ctx *ctx = ctx_arg; + + switch (key_len) + { + case 16: case 20: case 24: case 28: + case 32: case 36: case 40: + break; + default: + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return - EINVAL; + } + + ctx->key_len = key_len * 8; + N = ctx->key_len >> 5; + ctx->R = R = 8 + N; + + /* * map cipher key to initial key state (mu): */ + for (i = 0, pos = 0; i < N; i++, pos += 4) { + kappa[i] = + (in_key[pos ] << 24) ^ + (in_key[pos + 1] << 16) ^ + (in_key[pos + 2] << 8) ^ + (in_key[pos + 3] ); + } + + /* + * generate R + 1 round keys: + */ + for (r = 0; r <= R; r++) { + u32 K0, K1, K2, K3; + /* + * generate r-th round key K^r: + */ + K0 = T4[(kappa[N - 1] >> 24) ]; + K1 = T4[(kappa[N - 1] >> 16) & 0xff]; + K2 = T4[(kappa[N - 1] >> 8) & 0xff]; + K3 = T4[(kappa[N - 1] ) & 0xff]; + for (i = N - 2; i >= 0; i--) { + K0 = T4[(kappa[i] >> 24) ] ^ + (T5[(K0 >> 24) ] & 0xff000000U) ^ + (T5[(K0 >> 16) & 0xff] & 0x00ff0000U) ^ + (T5[(K0 >> 8) & 0xff] & 0x0000ff00U) ^ + (T5[(K0 ) & 0xff] & 0x000000ffU); + K1 = T4[(kappa[i] >> 16) & 0xff] ^ + (T5[(K1 >> 24) ] & 0xff000000U) ^ + (T5[(K1 >> 16) & 0xff] & 0x00ff0000U) ^ + (T5[(K1 >> 8) & 0xff] & 0x0000ff00U) ^ + (T5[(K1 ) & 0xff] & 0x000000ffU); + K2 = T4[(kappa[i] >> 8) & 0xff] ^ + (T5[(K2 >> 24) ] & 0xff000000U) ^ + (T5[(K2 >> 16) & 0xff] & 0x00ff0000U) ^ + (T5[(K2 >> 8) & 0xff] & 0x0000ff00U) ^ + (T5[(K2 ) & 0xff] & 0x000000ffU); + K3 = T4[(kappa[i] ) & 0xff] ^ + (T5[(K3 >> 24) ] & 0xff000000U) ^ + (T5[(K3 >> 16) & 0xff] & 0x00ff0000U) ^ + (T5[(K3 >> 8) & 0xff] & 0x0000ff00U) ^ + (T5[(K3 ) & 0xff] & 0x000000ffU); + } + + ctx->E[r][0] = K0; + ctx->E[r][1] = K1; + ctx->E[r][2] = K2; + ctx->E[r][3] = K3; + + /* + * compute kappa^{r+1} from kappa^r: + */ + if (r == R) { + break; + } + for (i = 0; i < N; i++) { + int j = i; + inter[i] = T0[(kappa[j--] >> 24) ]; + if (j < 0) j = N - 1; + inter[i] ^= T1[(kappa[j--] >> 16) & 0xff]; + if (j < 0) j = N - 1; + inter[i] ^= T2[(kappa[j--] >> 8) & 0xff]; + if (j < 0) j = N - 1; + inter[i] ^= T3[(kappa[j ] ) & 0xff]; + } + kappa[0] = inter[0] ^ rc[r]; + for (i = 1; i < N; i++) { + kappa[i] = inter[i]; + } + } + + /* + * generate inverse key schedule: K'^0 = K^R, K'^R = + * K^0, K'^r = theta(K^{R-r}): + */ + for (i = 0; i < 4; i++) { + ctx->D[0][i] = ctx->E[R][i]; + ctx->D[R][i] = ctx->E[0][i]; + } + for (r = 1; r < R; r++) { + for (i = 0; i < 4; i++) { + u32 v = ctx->E[R - r][i]; + ctx->D[r][i] = + T0[T4[(v >> 24) ] & 0xff] ^ + T1[T4[(v >> 16) & 0xff] & 0xff] ^ + T2[T4[(v >> 8) & 0xff] & 0xff] ^ + T3[T4[(v ) & 0xff] & 0xff]; + } + } + + return 0; +} + +static void anubis_crypt(u32 roundKey[ANUBIS_MAX_ROUNDS + 1][4], + u8 *ciphertext, const u8 *plaintext, const int R) +{ + int i, pos, r; + u32 state[4]; + u32 inter[4]; + + /* + * map plaintext block to cipher state (mu) + * and add initial round key (sigma[K^0]): + */ + for (i = 0, pos = 0; i < 4; i++, pos += 4) { + state[i] = + (plaintext[pos ] << 24) ^ + (plaintext[pos + 1] << 16) ^ + (plaintext[pos + 2] << 8) ^ + (plaintext[pos + 3] ) ^ + roundKey[0][i]; + } + + /* + * R - 1 full rounds: + */ + + for (r = 1; r < R; r++) { + inter[0] = + T0[(state[0] >> 24) ] ^ + T1[(state[1] >> 24) ] ^ + T2[(state[2] >> 24) ] ^ + T3[(state[3] >> 24) ] ^ + roundKey[r][0]; + inter[1] = + T0[(state[0] >> 16) & 0xff] ^ + T1[(state[1] >> 16) & 0xff] ^ + T2[(state[2] >> 16) & 0xff] ^ + T3[(state[3] >> 16) & 0xff] ^ + roundKey[r][1]; + inter[2] = + T0[(state[0] >> 8) & 0xff] ^ + T1[(state[1] >> 8) & 0xff] ^ + T2[(state[2] >> 8) & 0xff] ^ + T3[(state[3] >> 8) & 0xff] ^ + roundKey[r][2]; + inter[3] = + T0[(state[0] ) & 0xff] ^ + T1[(state[1] ) & 0xff] ^ + T2[(state[2] ) & 0xff] ^ + T3[(state[3] ) & 0xff] ^ + roundKey[r][3]; + state[0] = inter[0]; + state[1] = inter[1]; + state[2] = inter[2]; + state[3] = inter[3]; + } + + /* + * last round: + */ + + inter[0] = + (T0[(state[0] >> 24) ] & 0xff000000U) ^ + (T1[(state[1] >> 24) ] & 0x00ff0000U) ^ + (T2[(state[2] >> 24) ] & 0x0000ff00U) ^ + (T3[(state[3] >> 24) ] & 0x000000ffU) ^ + roundKey[R][0]; + inter[1] = + (T0[(state[0] >> 16) & 0xff] & 0xff000000U) ^ + (T1[(state[1] >> 16) & 0xff] & 0x00ff0000U) ^ + (T2[(state[2] >> 16) & 0xff] & 0x0000ff00U) ^ + (T3[(state[3] >> 16) & 0xff] & 0x000000ffU) ^ + roundKey[R][1]; + inter[2] = + (T0[(state[0] >> 8) & 0xff] & 0xff000000U) ^ + (T1[(state[1] >> 8) & 0xff] & 0x00ff0000U) ^ + (T2[(state[2] >> 8) & 0xff] & 0x0000ff00U) ^ + (T3[(state[3] >> 8) & 0xff] & 0x000000ffU) ^ + roundKey[R][2]; + inter[3] = + (T0[(state[0] ) & 0xff] & 0xff000000U) ^ + (T1[(state[1] ) & 0xff] & 0x00ff0000U) ^ + (T2[(state[2] ) & 0xff] & 0x0000ff00U) ^ + (T3[(state[3] ) & 0xff] & 0x000000ffU) ^ + roundKey[R][3]; + + /* + * map cipher state to ciphertext block (mu^{-1}): + */ + + for (i = 0, pos = 0; i < 4; i++, pos += 4) { + u32 w = inter[i]; + ciphertext[pos ] = (u8)(w >> 24); + ciphertext[pos + 1] = (u8)(w >> 16); + ciphertext[pos + 2] = (u8)(w >> 8); + ciphertext[pos + 3] = (u8)(w ); + } +} + +static void anubis_encrypt(void *ctx_arg, u8 *dst, const u8 *src) +{ + struct anubis_ctx *ctx = ctx_arg; + anubis_crypt(ctx->E, dst, src, ctx->R); +} + +static void anubis_decrypt(void *ctx_arg, u8 *dst, const u8 *src) +{ + struct anubis_ctx *ctx = ctx_arg; + anubis_crypt(ctx->D, dst, src, ctx->R); +} + +static struct crypto_alg anubis_alg = { + .cra_name = "anubis", + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = ANUBIS_BLOCK_SIZE, + .cra_ctxsize = sizeof (struct anubis_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(anubis_alg.cra_list), + .cra_u = { .cipher = { + .cia_min_keysize = ANUBIS_MIN_KEY_SIZE, + .cia_max_keysize = ANUBIS_MAX_KEY_SIZE, + .cia_setkey = anubis_setkey, + .cia_encrypt = anubis_encrypt, + .cia_decrypt = anubis_decrypt } } +}; + +static int __init init(void) +{ + int ret = 0; + + ret = crypto_register_alg(&anubis_alg); + return ret; +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&anubis_alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Anubis Cryptographic Algorithm"); diff -urN linux-2.4.27/crypto/api.c linux-2.4.28/crypto/api.c --- linux-2.4.27/crypto/api.c 2004-02-18 05:36:31.000000000 -0800 +++ linux-2.4.28/crypto/api.c 2004-11-17 03:54:21.170380296 -0800 @@ -156,8 +156,12 @@ void crypto_free_tfm(struct crypto_tfm *tfm) { + struct crypto_alg *alg = tfm->__crt_alg; + int size = sizeof(*tfm) + alg->cra_ctxsize; + crypto_exit_ops(tfm); - crypto_alg_put(tfm->__crt_alg); + crypto_alg_put(alg); + memset(tfm, 0, size); kfree(tfm); } diff -urN linux-2.4.27/crypto/arc4.c linux-2.4.28/crypto/arc4.c --- linux-2.4.27/crypto/arc4.c 2004-04-14 06:05:28.000000000 -0700 +++ linux-2.4.28/crypto/arc4.c 2004-11-17 03:54:21.170380296 -0800 @@ -3,7 +3,7 @@ * * ARC4 Cipher Algorithm * - * Jon Oberheide + * Jon Oberheide * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -100,4 +100,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ARC4 Cipher Algorithm"); -MODULE_AUTHOR("Jon Oberheide "); +MODULE_AUTHOR("Jon Oberheide "); diff -urN linux-2.4.27/crypto/blowfish.c linux-2.4.28/crypto/blowfish.c --- linux-2.4.27/crypto/blowfish.c 2004-02-18 05:36:31.000000000 -0800 +++ linux-2.4.28/crypto/blowfish.c 2004-11-17 03:54:21.171380337 -0800 @@ -3,9 +3,9 @@ * * Blowfish Cipher Algorithm, by Bruce Schneier. * http://www.counterpane.com/blowfish.html - * - * Adapated from Kerneli implementation. - * + * + * Adapted from Kerneli implementation. + * * Copyright (c) Herbert Valerio Riedel * Copyright (c) Kyle McMartin * Copyright (c) 2002 James Morris diff -urN linux-2.4.27/crypto/khazad.c linux-2.4.28/crypto/khazad.c --- linux-2.4.27/crypto/khazad.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.4.28/crypto/khazad.c 2004-11-17 03:54:21.174380461 -0800 @@ -0,0 +1,915 @@ +/* + * Cryptographic API. + * + * Khazad Algorithm + * + * The Khazad algorithm was developed by Paulo S. L. M. Barreto and + * Vincent Rijmen. It was a finalist in the NESSIE encryption contest. + * + * The original authors have disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * By Aaron Grothe ajgrothe@yahoo.com, August 1, 2004 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include + +#define KHAZAD_KEY_SIZE 16 +#define KHAZAD_BLOCK_SIZE 8 +#define KHAZAD_ROUNDS 8 + +struct khazad_ctx { + u64 E[KHAZAD_ROUNDS + 1]; + u64 D[KHAZAD_ROUNDS + 1]; +}; + +static const u64 T0[256] = { + 0xbad3d268bbb96a01ULL, 0x54fc4d19e59a66b1ULL, 0x2f71bc93e26514cdULL, + 0x749ccdb925871b51ULL, 0x53f55102f7a257a4ULL, 0xd3686bb8d0d6be03ULL, + 0xd26b6fbdd6deb504ULL, 0x4dd72964b35285feULL, 0x50f05d0dfdba4aadULL, + 0xace98a26cf09e063ULL, 0x8d8a0e83091c9684ULL, 0xbfdcc679a5914d1aULL, + 0x7090ddad3da7374dULL, 0x52f65507f1aa5ca3ULL, 0x9ab352c87ba417e1ULL, + 0x4cd42d61b55a8ef9ULL, 0xea238f65460320acULL, 0xd56273a6c4e68411ULL, + 0x97a466f155cc68c2ULL, 0xd16e63b2dcc6a80dULL, 0x3355ccffaa85d099ULL, + 0x51f35908fbb241aaULL, 0x5bed712ac7e20f9cULL, 0xa6f7a204f359ae55ULL, + 0xde7f5f81febec120ULL, 0x48d83d75ad7aa2e5ULL, 0xa8e59a32d729cc7fULL, + 0x99b65ec771bc0ae8ULL, 0xdb704b90e096e63bULL, 0x3256c8faac8ddb9eULL, + 0xb7c4e65195d11522ULL, 0xfc19d72b32b3aaceULL, 0xe338ab48704b7393ULL, + 0x9ebf42dc63843bfdULL, 0x91ae7eef41fc52d0ULL, 0x9bb056cd7dac1ce6ULL, + 0xe23baf4d76437894ULL, 0xbbd0d66dbdb16106ULL, 0x41c319589b32f1daULL, + 0x6eb2a5cb7957e517ULL, 0xa5f2ae0bf941b35cULL, 0xcb400bc08016564bULL, + 0x6bbdb1da677fc20cULL, 0x95a26efb59dc7eccULL, 0xa1febe1fe1619f40ULL, + 0xf308eb1810cbc3e3ULL, 0xb1cefe4f81e12f30ULL, 0x0206080a0c10160eULL, + 0xcc4917db922e675eULL, 0xc45137f3a26e3f66ULL, 0x1d2774694ee8cf53ULL, + 0x143c504478a09c6cULL, 0xc3582be8b0560e73ULL, 0x63a591f2573f9a34ULL, + 0xda734f95e69eed3cULL, 0x5de76934d3d2358eULL, 0x5fe1613edfc22380ULL, + 0xdc79578bf2aed72eULL, 0x7d87e99413cf486eULL, 0xcd4a13de94266c59ULL, + 0x7f81e19e1fdf5e60ULL, 0x5aee752fc1ea049bULL, 0x6cb4adc17547f319ULL, + 0x5ce46d31d5da3e89ULL, 0xf704fb0c08ebefffULL, 0x266a98bed42d47f2ULL, + 0xff1cdb2438abb7c7ULL, 0xed2a937e543b11b9ULL, 0xe825876f4a1336a2ULL, + 0x9dba4ed3699c26f4ULL, 0x6fb1a1ce7f5fee10ULL, 0x8e8f028c03048b8dULL, + 0x192b647d56c8e34fULL, 0xa0fdba1ae7699447ULL, 0xf00de7171ad3deeaULL, + 0x89861e97113cba98ULL, 0x0f113c332278692dULL, 0x07091c1b12383115ULL, + 0xafec8629c511fd6aULL, 0xfb10cb30208b9bdbULL, 0x0818202830405838ULL, + 0x153f54417ea8976bULL, 0x0d1734392e687f23ULL, 0x040c101418202c1cULL, + 0x0103040506080b07ULL, 0x64ac8de94507ab21ULL, 0xdf7c5b84f8b6ca27ULL, + 0x769ac5b329970d5fULL, 0x798bf9800bef6472ULL, 0xdd7a538ef4a6dc29ULL, + 0x3d47f4c98ef5b2b3ULL, 0x163a584e74b08a62ULL, 0x3f41fcc382e5a4bdULL, + 0x3759dcebb2a5fc85ULL, 0x6db7a9c4734ff81eULL, 0x3848e0d890dd95a8ULL, + 0xb9d6de67b1a17708ULL, 0x7395d1a237bf2a44ULL, 0xe926836a4c1b3da5ULL, + 0x355fd4e1beb5ea8bULL, 0x55ff491ce3926db6ULL, 0x7193d9a83baf3c4aULL, + 0x7b8df18a07ff727cULL, 0x8c890a860f149d83ULL, 0x7296d5a731b72143ULL, + 0x88851a921734b19fULL, 0xf607ff090ee3e4f8ULL, 0x2a7ea882fc4d33d6ULL, + 0x3e42f8c684edafbaULL, 0x5ee2653bd9ca2887ULL, 0x27699cbbd2254cf5ULL, + 0x46ca0543890ac0cfULL, 0x0c14303c28607424ULL, 0x65af89ec430fa026ULL, + 0x68b8bdd56d67df05ULL, 0x61a399f85b2f8c3aULL, 0x03050c0f0a181d09ULL, + 0xc15e23e2bc46187dULL, 0x57f94116ef827bb8ULL, 0xd6677fa9cefe9918ULL, + 0xd976439aec86f035ULL, 0x58e87d25cdfa1295ULL, 0xd875479fea8efb32ULL, + 0x66aa85e34917bd2fULL, 0xd7647bacc8f6921fULL, 0x3a4ee8d29ccd83a6ULL, + 0xc84507cf8a0e4b42ULL, 0x3c44f0cc88fdb9b4ULL, 0xfa13cf35268390dcULL, + 0x96a762f453c463c5ULL, 0xa7f4a601f551a552ULL, 0x98b55ac277b401efULL, + 0xec29977b52331abeULL, 0xb8d5da62b7a97c0fULL, 0xc7543bfca876226fULL, + 0xaeef822cc319f66dULL, 0x69bbb9d06b6fd402ULL, 0x4bdd317aa762bfecULL, + 0xabe0963ddd31d176ULL, 0xa9e69e37d121c778ULL, 0x67a981e64f1fb628ULL, + 0x0a1e28223c504e36ULL, 0x47c901468f02cbc8ULL, 0xf20bef1d16c3c8e4ULL, + 0xb5c2ee5b99c1032cULL, 0x226688aacc0d6beeULL, 0xe532b356647b4981ULL, + 0xee2f9f715e230cb0ULL, 0xbedfc27ca399461dULL, 0x2b7dac87fa4538d1ULL, + 0x819e3ebf217ce2a0ULL, 0x1236485a6c90a67eULL, 0x839836b52d6cf4aeULL, + 0x1b2d6c775ad8f541ULL, 0x0e1238362470622aULL, 0x23658cafca0560e9ULL, + 0xf502f30604fbf9f1ULL, 0x45cf094c8312ddc6ULL, 0x216384a5c61576e7ULL, + 0xce4f1fd19e3e7150ULL, 0x49db3970ab72a9e2ULL, 0x2c74b09ce87d09c4ULL, + 0xf916c33a2c9b8dd5ULL, 0xe637bf596e635488ULL, 0xb6c7e25493d91e25ULL, + 0x2878a088f05d25d8ULL, 0x17395c4b72b88165ULL, 0x829b32b02b64ffa9ULL, + 0x1a2e68725cd0fe46ULL, 0x8b80169d1d2cac96ULL, 0xfe1fdf213ea3bcc0ULL, + 0x8a8312981b24a791ULL, 0x091b242d3648533fULL, 0xc94603ca8c064045ULL, + 0x879426a1354cd8b2ULL, 0x4ed2256bb94a98f7ULL, 0xe13ea3427c5b659dULL, + 0x2e72b896e46d1fcaULL, 0xe431b75362734286ULL, 0xe03da7477a536e9aULL, + 0xeb208b60400b2babULL, 0x90ad7aea47f459d7ULL, 0xa4f1aa0eff49b85bULL, + 0x1e22786644f0d25aULL, 0x85922eab395ccebcULL, 0x60a09dfd5d27873dULL, + 0x0000000000000000ULL, 0x256f94b1de355afbULL, 0xf401f70302f3f2f6ULL, + 0xf10ee3121cdbd5edULL, 0x94a16afe5fd475cbULL, 0x0b1d2c273a584531ULL, + 0xe734bb5c686b5f8fULL, 0x759fc9bc238f1056ULL, 0xef2c9b74582b07b7ULL, + 0x345cd0e4b8bde18cULL, 0x3153c4f5a695c697ULL, 0xd46177a3c2ee8f16ULL, + 0xd06d67b7dacea30aULL, 0x869722a43344d3b5ULL, 0x7e82e59b19d75567ULL, + 0xadea8e23c901eb64ULL, 0xfd1ad32e34bba1c9ULL, 0x297ba48df6552edfULL, + 0x3050c0f0a09dcd90ULL, 0x3b4decd79ac588a1ULL, 0x9fbc46d9658c30faULL, + 0xf815c73f2a9386d2ULL, 0xc6573ff9ae7e2968ULL, 0x13354c5f6a98ad79ULL, + 0x060a181e14303a12ULL, 0x050f14111e28271bULL, 0xc55233f6a4663461ULL, + 0x113344556688bb77ULL, 0x7799c1b62f9f0658ULL, 0x7c84ed9115c74369ULL, + 0x7a8ef58f01f7797bULL, 0x7888fd850de76f75ULL, 0x365ad8eeb4adf782ULL, + 0x1c24706c48e0c454ULL, 0x394be4dd96d59eafULL, 0x59eb7920cbf21992ULL, + 0x1828607850c0e848ULL, 0x56fa4513e98a70bfULL, 0xb3c8f6458df1393eULL, + 0xb0cdfa4a87e92437ULL, 0x246c90b4d83d51fcULL, 0x206080a0c01d7de0ULL, + 0xb2cbf2408bf93239ULL, 0x92ab72e04be44fd9ULL, 0xa3f8b615ed71894eULL, + 0xc05d27e7ba4e137aULL, 0x44cc0d49851ad6c1ULL, 0x62a695f751379133ULL, + 0x103040506080b070ULL, 0xb4c1ea5e9fc9082bULL, 0x84912aae3f54c5bbULL, + 0x43c511529722e7d4ULL, 0x93a876e54dec44deULL, 0xc25b2fedb65e0574ULL, + 0x4ade357fa16ab4ebULL, 0xbddace73a9815b14ULL, 0x8f8c0689050c808aULL, + 0x2d77b499ee7502c3ULL, 0xbcd9ca76af895013ULL, 0x9cb94ad66f942df3ULL, + 0x6abeb5df6177c90bULL, 0x40c01d5d9d3afaddULL, 0xcf4c1bd498367a57ULL, + 0xa2fbb210eb798249ULL, 0x809d3aba2774e9a7ULL, 0x4fd1216ebf4293f0ULL, + 0x1f217c6342f8d95dULL, 0xca430fc5861e5d4cULL, 0xaae39238db39da71ULL, + 0x42c61557912aecd3ULL +}; + +static const u64 T1[256] = { + 0xd3ba68d2b9bb016aULL, 0xfc54194d9ae5b166ULL, 0x712f93bc65e2cd14ULL, + 0x9c74b9cd8725511bULL, 0xf5530251a2f7a457ULL, 0x68d3b86bd6d003beULL, + 0x6bd2bd6fded604b5ULL, 0xd74d642952b3fe85ULL, 0xf0500d5dbafdad4aULL, + 0xe9ac268a09cf63e0ULL, 0x8a8d830e1c098496ULL, 0xdcbf79c691a51a4dULL, + 0x9070addda73d4d37ULL, 0xf6520755aaf1a35cULL, 0xb39ac852a47be117ULL, + 0xd44c612d5ab5f98eULL, 0x23ea658f0346ac20ULL, 0x62d5a673e6c41184ULL, + 0xa497f166cc55c268ULL, 0x6ed1b263c6dc0da8ULL, 0x5533ffcc85aa99d0ULL, + 0xf3510859b2fbaa41ULL, 0xed5b2a71e2c79c0fULL, 0xf7a604a259f355aeULL, + 0x7fde815fbefe20c1ULL, 0xd848753d7aade5a2ULL, 0xe5a8329a29d77fccULL, + 0xb699c75ebc71e80aULL, 0x70db904b96e03be6ULL, 0x5632fac88dac9edbULL, + 0xc4b751e6d1952215ULL, 0x19fc2bd7b332ceaaULL, 0x38e348ab4b709373ULL, + 0xbf9edc428463fd3bULL, 0xae91ef7efc41d052ULL, 0xb09bcd56ac7de61cULL, + 0x3be24daf43769478ULL, 0xd0bb6dd6b1bd0661ULL, 0xc3415819329bdaf1ULL, + 0xb26ecba5577917e5ULL, 0xf2a50bae41f95cb3ULL, 0x40cbc00b16804b56ULL, + 0xbd6bdab17f670cc2ULL, 0xa295fb6edc59cc7eULL, 0xfea11fbe61e1409fULL, + 0x08f318ebcb10e3c3ULL, 0xceb14ffee181302fULL, 0x06020a08100c0e16ULL, + 0x49ccdb172e925e67ULL, 0x51c4f3376ea2663fULL, 0x271d6974e84e53cfULL, + 0x3c144450a0786c9cULL, 0x58c3e82b56b0730eULL, 0xa563f2913f57349aULL, + 0x73da954f9ee63cedULL, 0xe75d3469d2d38e35ULL, 0xe15f3e61c2df8023ULL, + 0x79dc8b57aef22ed7ULL, 0x877d94e9cf136e48ULL, 0x4acdde132694596cULL, + 0x817f9ee1df1f605eULL, 0xee5a2f75eac19b04ULL, 0xb46cc1ad477519f3ULL, + 0xe45c316ddad5893eULL, 0x04f70cfbeb08ffefULL, 0x6a26be982dd4f247ULL, + 0x1cff24dbab38c7b7ULL, 0x2aed7e933b54b911ULL, 0x25e86f87134aa236ULL, + 0xba9dd34e9c69f426ULL, 0xb16fcea15f7f10eeULL, 0x8f8e8c0204038d8bULL, + 0x2b197d64c8564fe3ULL, 0xfda01aba69e74794ULL, 0x0df017e7d31aeadeULL, + 0x8689971e3c1198baULL, 0x110f333c78222d69ULL, 0x09071b1c38121531ULL, + 0xecaf298611c56afdULL, 0x10fb30cb8b20db9bULL, 0x1808282040303858ULL, + 0x3f154154a87e6b97ULL, 0x170d3934682e237fULL, 0x0c04141020181c2cULL, + 0x030105040806070bULL, 0xac64e98d074521abULL, 0x7cdf845bb6f827caULL, + 0x9a76b3c597295f0dULL, 0x8b7980f9ef0b7264ULL, 0x7add8e53a6f429dcULL, + 0x473dc9f4f58eb3b2ULL, 0x3a164e58b074628aULL, 0x413fc3fce582bda4ULL, + 0x5937ebdca5b285fcULL, 0xb76dc4a94f731ef8ULL, 0x4838d8e0dd90a895ULL, + 0xd6b967dea1b10877ULL, 0x9573a2d1bf37442aULL, 0x26e96a831b4ca53dULL, + 0x5f35e1d4b5be8beaULL, 0xff551c4992e3b66dULL, 0x9371a8d9af3b4a3cULL, + 0x8d7b8af1ff077c72ULL, 0x898c860a140f839dULL, 0x9672a7d5b7314321ULL, + 0x8588921a34179fb1ULL, 0x07f609ffe30ef8e4ULL, 0x7e2a82a84dfcd633ULL, + 0x423ec6f8ed84baafULL, 0xe25e3b65cad98728ULL, 0x6927bb9c25d2f54cULL, + 0xca4643050a89cfc0ULL, 0x140c3c3060282474ULL, 0xaf65ec890f4326a0ULL, + 0xb868d5bd676d05dfULL, 0xa361f8992f5b3a8cULL, 0x05030f0c180a091dULL, + 0x5ec1e22346bc7d18ULL, 0xf957164182efb87bULL, 0x67d6a97ffece1899ULL, + 0x76d99a4386ec35f0ULL, 0xe858257dfacd9512ULL, 0x75d89f478eea32fbULL, + 0xaa66e38517492fbdULL, 0x64d7ac7bf6c81f92ULL, 0x4e3ad2e8cd9ca683ULL, + 0x45c8cf070e8a424bULL, 0x443cccf0fd88b4b9ULL, 0x13fa35cf8326dc90ULL, + 0xa796f462c453c563ULL, 0xf4a701a651f552a5ULL, 0xb598c25ab477ef01ULL, + 0x29ec7b973352be1aULL, 0xd5b862daa9b70f7cULL, 0x54c7fc3b76a86f22ULL, + 0xefae2c8219c36df6ULL, 0xbb69d0b96f6b02d4ULL, 0xdd4b7a3162a7ecbfULL, + 0xe0ab3d9631dd76d1ULL, 0xe6a9379e21d178c7ULL, 0xa967e6811f4f28b6ULL, + 0x1e0a2228503c364eULL, 0xc9474601028fc8cbULL, 0x0bf21defc316e4c8ULL, + 0xc2b55beec1992c03ULL, 0x6622aa880dccee6bULL, 0x32e556b37b648149ULL, + 0x2fee719f235eb00cULL, 0xdfbe7cc299a31d46ULL, 0x7d2b87ac45fad138ULL, + 0x9e81bf3e7c21a0e2ULL, 0x36125a48906c7ea6ULL, 0x9883b5366c2daef4ULL, + 0x2d1b776cd85a41f5ULL, 0x120e363870242a62ULL, 0x6523af8c05cae960ULL, + 0x02f506f3fb04f1f9ULL, 0xcf454c091283c6ddULL, 0x6321a58415c6e776ULL, + 0x4fced11f3e9e5071ULL, 0xdb49703972abe2a9ULL, 0x742c9cb07de8c409ULL, + 0x16f93ac39b2cd58dULL, 0x37e659bf636e8854ULL, 0xc7b654e2d993251eULL, + 0x782888a05df0d825ULL, 0x39174b5cb8726581ULL, 0x9b82b032642ba9ffULL, + 0x2e1a7268d05c46feULL, 0x808b9d162c1d96acULL, 0x1ffe21dfa33ec0bcULL, + 0x838a9812241b91a7ULL, 0x1b092d2448363f53ULL, 0x46c9ca03068c4540ULL, + 0x9487a1264c35b2d8ULL, 0xd24e6b254ab9f798ULL, 0x3ee142a35b7c9d65ULL, + 0x722e96b86de4ca1fULL, 0x31e453b773628642ULL, 0x3de047a7537a9a6eULL, + 0x20eb608b0b40ab2bULL, 0xad90ea7af447d759ULL, 0xf1a40eaa49ff5bb8ULL, + 0x221e6678f0445ad2ULL, 0x9285ab2e5c39bcceULL, 0xa060fd9d275d3d87ULL, + 0x0000000000000000ULL, 0x6f25b19435defb5aULL, 0x01f403f7f302f6f2ULL, + 0x0ef112e3db1cedd5ULL, 0xa194fe6ad45fcb75ULL, 0x1d0b272c583a3145ULL, + 0x34e75cbb6b688f5fULL, 0x9f75bcc98f235610ULL, 0x2cef749b2b58b707ULL, + 0x5c34e4d0bdb88ce1ULL, 0x5331f5c495a697c6ULL, 0x61d4a377eec2168fULL, + 0x6dd0b767ceda0aa3ULL, 0x9786a4224433b5d3ULL, 0x827e9be5d7196755ULL, + 0xeaad238e01c964ebULL, 0x1afd2ed3bb34c9a1ULL, 0x7b298da455f6df2eULL, + 0x5030f0c09da090cdULL, 0x4d3bd7ecc59aa188ULL, 0xbc9fd9468c65fa30ULL, + 0x15f83fc7932ad286ULL, 0x57c6f93f7eae6829ULL, 0x35135f4c986a79adULL, + 0x0a061e183014123aULL, 0x0f051114281e1b27ULL, 0x52c5f63366a46134ULL, + 0x33115544886677bbULL, 0x9977b6c19f2f5806ULL, 0x847c91edc7156943ULL, + 0x8e7a8ff5f7017b79ULL, 0x887885fde70d756fULL, 0x5a36eed8adb482f7ULL, + 0x241c6c70e04854c4ULL, 0x4b39dde4d596af9eULL, 0xeb592079f2cb9219ULL, + 0x28187860c05048e8ULL, 0xfa5613458ae9bf70ULL, 0xc8b345f6f18d3e39ULL, + 0xcdb04afae9873724ULL, 0x6c24b4903dd8fc51ULL, 0x6020a0801dc0e07dULL, + 0xcbb240f2f98b3932ULL, 0xab92e072e44bd94fULL, 0xf8a315b671ed4e89ULL, + 0x5dc0e7274eba7a13ULL, 0xcc44490d1a85c1d6ULL, 0xa662f79537513391ULL, + 0x30105040806070b0ULL, 0xc1b45eeac99f2b08ULL, 0x9184ae2a543fbbc5ULL, + 0xc54352112297d4e7ULL, 0xa893e576ec4dde44ULL, 0x5bc2ed2f5eb67405ULL, + 0xde4a7f356aa1ebb4ULL, 0xdabd73ce81a9145bULL, 0x8c8f89060c058a80ULL, + 0x772d99b475eec302ULL, 0xd9bc76ca89af1350ULL, 0xb99cd64a946ff32dULL, + 0xbe6adfb577610bc9ULL, 0xc0405d1d3a9dddfaULL, 0x4ccfd41b3698577aULL, + 0xfba210b279eb4982ULL, 0x9d80ba3a7427a7e9ULL, 0xd14f6e2142bff093ULL, + 0x211f637cf8425dd9ULL, 0x43cac50f1e864c5dULL, 0xe3aa389239db71daULL, + 0xc64257152a91d3ecULL +}; + +static const u64 T2[256] = { + 0xd268bad36a01bbb9ULL, 0x4d1954fc66b1e59aULL, 0xbc932f7114cde265ULL, + 0xcdb9749c1b512587ULL, 0x510253f557a4f7a2ULL, 0x6bb8d368be03d0d6ULL, + 0x6fbdd26bb504d6deULL, 0x29644dd785feb352ULL, 0x5d0d50f04aadfdbaULL, + 0x8a26ace9e063cf09ULL, 0x0e838d8a9684091cULL, 0xc679bfdc4d1aa591ULL, + 0xddad7090374d3da7ULL, 0x550752f65ca3f1aaULL, 0x52c89ab317e17ba4ULL, + 0x2d614cd48ef9b55aULL, 0x8f65ea2320ac4603ULL, 0x73a6d5628411c4e6ULL, + 0x66f197a468c255ccULL, 0x63b2d16ea80ddcc6ULL, 0xccff3355d099aa85ULL, + 0x590851f341aafbb2ULL, 0x712a5bed0f9cc7e2ULL, 0xa204a6f7ae55f359ULL, + 0x5f81de7fc120febeULL, 0x3d7548d8a2e5ad7aULL, 0x9a32a8e5cc7fd729ULL, + 0x5ec799b60ae871bcULL, 0x4b90db70e63be096ULL, 0xc8fa3256db9eac8dULL, + 0xe651b7c4152295d1ULL, 0xd72bfc19aace32b3ULL, 0xab48e3387393704bULL, + 0x42dc9ebf3bfd6384ULL, 0x7eef91ae52d041fcULL, 0x56cd9bb01ce67dacULL, + 0xaf4de23b78947643ULL, 0xd66dbbd06106bdb1ULL, 0x195841c3f1da9b32ULL, + 0xa5cb6eb2e5177957ULL, 0xae0ba5f2b35cf941ULL, 0x0bc0cb40564b8016ULL, + 0xb1da6bbdc20c677fULL, 0x6efb95a27ecc59dcULL, 0xbe1fa1fe9f40e161ULL, + 0xeb18f308c3e310cbULL, 0xfe4fb1ce2f3081e1ULL, 0x080a0206160e0c10ULL, + 0x17dbcc49675e922eULL, 0x37f3c4513f66a26eULL, 0x74691d27cf534ee8ULL, + 0x5044143c9c6c78a0ULL, 0x2be8c3580e73b056ULL, 0x91f263a59a34573fULL, + 0x4f95da73ed3ce69eULL, 0x69345de7358ed3d2ULL, 0x613e5fe12380dfc2ULL, + 0x578bdc79d72ef2aeULL, 0xe9947d87486e13cfULL, 0x13decd4a6c599426ULL, + 0xe19e7f815e601fdfULL, 0x752f5aee049bc1eaULL, 0xadc16cb4f3197547ULL, + 0x6d315ce43e89d5daULL, 0xfb0cf704efff08ebULL, 0x98be266a47f2d42dULL, + 0xdb24ff1cb7c738abULL, 0x937eed2a11b9543bULL, 0x876fe82536a24a13ULL, + 0x4ed39dba26f4699cULL, 0xa1ce6fb1ee107f5fULL, 0x028c8e8f8b8d0304ULL, + 0x647d192be34f56c8ULL, 0xba1aa0fd9447e769ULL, 0xe717f00ddeea1ad3ULL, + 0x1e978986ba98113cULL, 0x3c330f11692d2278ULL, 0x1c1b070931151238ULL, + 0x8629afecfd6ac511ULL, 0xcb30fb109bdb208bULL, 0x2028081858383040ULL, + 0x5441153f976b7ea8ULL, 0x34390d177f232e68ULL, 0x1014040c2c1c1820ULL, + 0x040501030b070608ULL, 0x8de964acab214507ULL, 0x5b84df7cca27f8b6ULL, + 0xc5b3769a0d5f2997ULL, 0xf980798b64720befULL, 0x538edd7adc29f4a6ULL, + 0xf4c93d47b2b38ef5ULL, 0x584e163a8a6274b0ULL, 0xfcc33f41a4bd82e5ULL, + 0xdceb3759fc85b2a5ULL, 0xa9c46db7f81e734fULL, 0xe0d8384895a890ddULL, + 0xde67b9d67708b1a1ULL, 0xd1a273952a4437bfULL, 0x836ae9263da54c1bULL, + 0xd4e1355fea8bbeb5ULL, 0x491c55ff6db6e392ULL, 0xd9a871933c4a3bafULL, + 0xf18a7b8d727c07ffULL, 0x0a868c899d830f14ULL, 0xd5a77296214331b7ULL, + 0x1a928885b19f1734ULL, 0xff09f607e4f80ee3ULL, 0xa8822a7e33d6fc4dULL, + 0xf8c63e42afba84edULL, 0x653b5ee22887d9caULL, 0x9cbb27694cf5d225ULL, + 0x054346cac0cf890aULL, 0x303c0c1474242860ULL, 0x89ec65afa026430fULL, + 0xbdd568b8df056d67ULL, 0x99f861a38c3a5b2fULL, 0x0c0f03051d090a18ULL, + 0x23e2c15e187dbc46ULL, 0x411657f97bb8ef82ULL, 0x7fa9d6679918cefeULL, + 0x439ad976f035ec86ULL, 0x7d2558e81295cdfaULL, 0x479fd875fb32ea8eULL, + 0x85e366aabd2f4917ULL, 0x7bacd764921fc8f6ULL, 0xe8d23a4e83a69ccdULL, + 0x07cfc8454b428a0eULL, 0xf0cc3c44b9b488fdULL, 0xcf35fa1390dc2683ULL, + 0x62f496a763c553c4ULL, 0xa601a7f4a552f551ULL, 0x5ac298b501ef77b4ULL, + 0x977bec291abe5233ULL, 0xda62b8d57c0fb7a9ULL, 0x3bfcc754226fa876ULL, + 0x822caeeff66dc319ULL, 0xb9d069bbd4026b6fULL, 0x317a4bddbfeca762ULL, + 0x963dabe0d176dd31ULL, 0x9e37a9e6c778d121ULL, 0x81e667a9b6284f1fULL, + 0x28220a1e4e363c50ULL, 0x014647c9cbc88f02ULL, 0xef1df20bc8e416c3ULL, + 0xee5bb5c2032c99c1ULL, 0x88aa22666beecc0dULL, 0xb356e5324981647bULL, + 0x9f71ee2f0cb05e23ULL, 0xc27cbedf461da399ULL, 0xac872b7d38d1fa45ULL, + 0x3ebf819ee2a0217cULL, 0x485a1236a67e6c90ULL, 0x36b58398f4ae2d6cULL, + 0x6c771b2df5415ad8ULL, 0x38360e12622a2470ULL, 0x8caf236560e9ca05ULL, + 0xf306f502f9f104fbULL, 0x094c45cfddc68312ULL, 0x84a5216376e7c615ULL, + 0x1fd1ce4f71509e3eULL, 0x397049dba9e2ab72ULL, 0xb09c2c7409c4e87dULL, + 0xc33af9168dd52c9bULL, 0xbf59e63754886e63ULL, 0xe254b6c71e2593d9ULL, + 0xa088287825d8f05dULL, 0x5c4b1739816572b8ULL, 0x32b0829bffa92b64ULL, + 0x68721a2efe465cd0ULL, 0x169d8b80ac961d2cULL, 0xdf21fe1fbcc03ea3ULL, + 0x12988a83a7911b24ULL, 0x242d091b533f3648ULL, 0x03cac94640458c06ULL, + 0x26a18794d8b2354cULL, 0x256b4ed298f7b94aULL, 0xa342e13e659d7c5bULL, + 0xb8962e721fcae46dULL, 0xb753e43142866273ULL, 0xa747e03d6e9a7a53ULL, + 0x8b60eb202bab400bULL, 0x7aea90ad59d747f4ULL, 0xaa0ea4f1b85bff49ULL, + 0x78661e22d25a44f0ULL, 0x2eab8592cebc395cULL, 0x9dfd60a0873d5d27ULL, + 0x0000000000000000ULL, 0x94b1256f5afbde35ULL, 0xf703f401f2f602f3ULL, + 0xe312f10ed5ed1cdbULL, 0x6afe94a175cb5fd4ULL, 0x2c270b1d45313a58ULL, + 0xbb5ce7345f8f686bULL, 0xc9bc759f1056238fULL, 0x9b74ef2c07b7582bULL, + 0xd0e4345ce18cb8bdULL, 0xc4f53153c697a695ULL, 0x77a3d4618f16c2eeULL, + 0x67b7d06da30adaceULL, 0x22a48697d3b53344ULL, 0xe59b7e82556719d7ULL, + 0x8e23adeaeb64c901ULL, 0xd32efd1aa1c934bbULL, 0xa48d297b2edff655ULL, + 0xc0f03050cd90a09dULL, 0xecd73b4d88a19ac5ULL, 0x46d99fbc30fa658cULL, + 0xc73ff81586d22a93ULL, 0x3ff9c6572968ae7eULL, 0x4c5f1335ad796a98ULL, + 0x181e060a3a121430ULL, 0x1411050f271b1e28ULL, 0x33f6c5523461a466ULL, + 0x44551133bb776688ULL, 0xc1b6779906582f9fULL, 0xed917c84436915c7ULL, + 0xf58f7a8e797b01f7ULL, 0xfd8578886f750de7ULL, 0xd8ee365af782b4adULL, + 0x706c1c24c45448e0ULL, 0xe4dd394b9eaf96d5ULL, 0x792059eb1992cbf2ULL, + 0x60781828e84850c0ULL, 0x451356fa70bfe98aULL, 0xf645b3c8393e8df1ULL, + 0xfa4ab0cd243787e9ULL, 0x90b4246c51fcd83dULL, 0x80a020607de0c01dULL, + 0xf240b2cb32398bf9ULL, 0x72e092ab4fd94be4ULL, 0xb615a3f8894eed71ULL, + 0x27e7c05d137aba4eULL, 0x0d4944ccd6c1851aULL, 0x95f762a691335137ULL, + 0x40501030b0706080ULL, 0xea5eb4c1082b9fc9ULL, 0x2aae8491c5bb3f54ULL, + 0x115243c5e7d49722ULL, 0x76e593a844de4decULL, 0x2fedc25b0574b65eULL, + 0x357f4adeb4eba16aULL, 0xce73bdda5b14a981ULL, 0x06898f8c808a050cULL, + 0xb4992d7702c3ee75ULL, 0xca76bcd95013af89ULL, 0x4ad69cb92df36f94ULL, + 0xb5df6abec90b6177ULL, 0x1d5d40c0fadd9d3aULL, 0x1bd4cf4c7a579836ULL, + 0xb210a2fb8249eb79ULL, 0x3aba809de9a72774ULL, 0x216e4fd193f0bf42ULL, + 0x7c631f21d95d42f8ULL, 0x0fc5ca435d4c861eULL, 0x9238aae3da71db39ULL, + 0x155742c6ecd3912aULL +}; + +static const u64 T3[256] = { + 0x68d2d3ba016ab9bbULL, 0x194dfc54b1669ae5ULL, 0x93bc712fcd1465e2ULL, + 0xb9cd9c74511b8725ULL, 0x0251f553a457a2f7ULL, 0xb86b68d303bed6d0ULL, + 0xbd6f6bd204b5ded6ULL, 0x6429d74dfe8552b3ULL, 0x0d5df050ad4abafdULL, + 0x268ae9ac63e009cfULL, 0x830e8a8d84961c09ULL, 0x79c6dcbf1a4d91a5ULL, + 0xaddd90704d37a73dULL, 0x0755f652a35caaf1ULL, 0xc852b39ae117a47bULL, + 0x612dd44cf98e5ab5ULL, 0x658f23eaac200346ULL, 0xa67362d51184e6c4ULL, + 0xf166a497c268cc55ULL, 0xb2636ed10da8c6dcULL, 0xffcc553399d085aaULL, + 0x0859f351aa41b2fbULL, 0x2a71ed5b9c0fe2c7ULL, 0x04a2f7a655ae59f3ULL, + 0x815f7fde20c1befeULL, 0x753dd848e5a27aadULL, 0x329ae5a87fcc29d7ULL, + 0xc75eb699e80abc71ULL, 0x904b70db3be696e0ULL, 0xfac856329edb8dacULL, + 0x51e6c4b72215d195ULL, 0x2bd719fcceaab332ULL, 0x48ab38e393734b70ULL, + 0xdc42bf9efd3b8463ULL, 0xef7eae91d052fc41ULL, 0xcd56b09be61cac7dULL, + 0x4daf3be294784376ULL, 0x6dd6d0bb0661b1bdULL, 0x5819c341daf1329bULL, + 0xcba5b26e17e55779ULL, 0x0baef2a55cb341f9ULL, 0xc00b40cb4b561680ULL, + 0xdab1bd6b0cc27f67ULL, 0xfb6ea295cc7edc59ULL, 0x1fbefea1409f61e1ULL, + 0x18eb08f3e3c3cb10ULL, 0x4ffeceb1302fe181ULL, 0x0a0806020e16100cULL, + 0xdb1749cc5e672e92ULL, 0xf33751c4663f6ea2ULL, 0x6974271d53cfe84eULL, + 0x44503c146c9ca078ULL, 0xe82b58c3730e56b0ULL, 0xf291a563349a3f57ULL, + 0x954f73da3ced9ee6ULL, 0x3469e75d8e35d2d3ULL, 0x3e61e15f8023c2dfULL, + 0x8b5779dc2ed7aef2ULL, 0x94e9877d6e48cf13ULL, 0xde134acd596c2694ULL, + 0x9ee1817f605edf1fULL, 0x2f75ee5a9b04eac1ULL, 0xc1adb46c19f34775ULL, + 0x316de45c893edad5ULL, 0x0cfb04f7ffefeb08ULL, 0xbe986a26f2472dd4ULL, + 0x24db1cffc7b7ab38ULL, 0x7e932aedb9113b54ULL, 0x6f8725e8a236134aULL, + 0xd34eba9df4269c69ULL, 0xcea1b16f10ee5f7fULL, 0x8c028f8e8d8b0403ULL, + 0x7d642b194fe3c856ULL, 0x1abafda0479469e7ULL, 0x17e70df0eaded31aULL, + 0x971e868998ba3c11ULL, 0x333c110f2d697822ULL, 0x1b1c090715313812ULL, + 0x2986ecaf6afd11c5ULL, 0x30cb10fbdb9b8b20ULL, 0x2820180838584030ULL, + 0x41543f156b97a87eULL, 0x3934170d237f682eULL, 0x14100c041c2c2018ULL, + 0x05040301070b0806ULL, 0xe98dac6421ab0745ULL, 0x845b7cdf27cab6f8ULL, + 0xb3c59a765f0d9729ULL, 0x80f98b797264ef0bULL, 0x8e537add29dca6f4ULL, + 0xc9f4473db3b2f58eULL, 0x4e583a16628ab074ULL, 0xc3fc413fbda4e582ULL, + 0xebdc593785fca5b2ULL, 0xc4a9b76d1ef84f73ULL, 0xd8e04838a895dd90ULL, + 0x67ded6b90877a1b1ULL, 0xa2d19573442abf37ULL, 0x6a8326e9a53d1b4cULL, + 0xe1d45f358beab5beULL, 0x1c49ff55b66d92e3ULL, 0xa8d993714a3caf3bULL, + 0x8af18d7b7c72ff07ULL, 0x860a898c839d140fULL, 0xa7d596724321b731ULL, + 0x921a85889fb13417ULL, 0x09ff07f6f8e4e30eULL, 0x82a87e2ad6334dfcULL, + 0xc6f8423ebaafed84ULL, 0x3b65e25e8728cad9ULL, 0xbb9c6927f54c25d2ULL, + 0x4305ca46cfc00a89ULL, 0x3c30140c24746028ULL, 0xec89af6526a00f43ULL, + 0xd5bdb86805df676dULL, 0xf899a3613a8c2f5bULL, 0x0f0c0503091d180aULL, + 0xe2235ec17d1846bcULL, 0x1641f957b87b82efULL, 0xa97f67d61899feceULL, + 0x9a4376d935f086ecULL, 0x257de8589512facdULL, 0x9f4775d832fb8eeaULL, + 0xe385aa662fbd1749ULL, 0xac7b64d71f92f6c8ULL, 0xd2e84e3aa683cd9cULL, + 0xcf0745c8424b0e8aULL, 0xccf0443cb4b9fd88ULL, 0x35cf13fadc908326ULL, + 0xf462a796c563c453ULL, 0x01a6f4a752a551f5ULL, 0xc25ab598ef01b477ULL, + 0x7b9729ecbe1a3352ULL, 0x62dad5b80f7ca9b7ULL, 0xfc3b54c76f2276a8ULL, + 0x2c82efae6df619c3ULL, 0xd0b9bb6902d46f6bULL, 0x7a31dd4becbf62a7ULL, + 0x3d96e0ab76d131ddULL, 0x379ee6a978c721d1ULL, 0xe681a96728b61f4fULL, + 0x22281e0a364e503cULL, 0x4601c947c8cb028fULL, 0x1def0bf2e4c8c316ULL, + 0x5beec2b52c03c199ULL, 0xaa886622ee6b0dccULL, 0x56b332e581497b64ULL, + 0x719f2feeb00c235eULL, 0x7cc2dfbe1d4699a3ULL, 0x87ac7d2bd13845faULL, + 0xbf3e9e81a0e27c21ULL, 0x5a4836127ea6906cULL, 0xb5369883aef46c2dULL, + 0x776c2d1b41f5d85aULL, 0x3638120e2a627024ULL, 0xaf8c6523e96005caULL, + 0x06f302f5f1f9fb04ULL, 0x4c09cf45c6dd1283ULL, 0xa5846321e77615c6ULL, + 0xd11f4fce50713e9eULL, 0x7039db49e2a972abULL, 0x9cb0742cc4097de8ULL, + 0x3ac316f9d58d9b2cULL, 0x59bf37e68854636eULL, 0x54e2c7b6251ed993ULL, + 0x88a07828d8255df0ULL, 0x4b5c39176581b872ULL, 0xb0329b82a9ff642bULL, + 0x72682e1a46fed05cULL, 0x9d16808b96ac2c1dULL, 0x21df1ffec0bca33eULL, + 0x9812838a91a7241bULL, 0x2d241b093f534836ULL, 0xca0346c94540068cULL, + 0xa1269487b2d84c35ULL, 0x6b25d24ef7984ab9ULL, 0x42a33ee19d655b7cULL, + 0x96b8722eca1f6de4ULL, 0x53b731e486427362ULL, 0x47a73de09a6e537aULL, + 0x608b20ebab2b0b40ULL, 0xea7aad90d759f447ULL, 0x0eaaf1a45bb849ffULL, + 0x6678221e5ad2f044ULL, 0xab2e9285bcce5c39ULL, 0xfd9da0603d87275dULL, + 0x0000000000000000ULL, 0xb1946f25fb5a35deULL, 0x03f701f4f6f2f302ULL, + 0x12e30ef1edd5db1cULL, 0xfe6aa194cb75d45fULL, 0x272c1d0b3145583aULL, + 0x5cbb34e78f5f6b68ULL, 0xbcc99f7556108f23ULL, 0x749b2cefb7072b58ULL, + 0xe4d05c348ce1bdb8ULL, 0xf5c4533197c695a6ULL, 0xa37761d4168feec2ULL, + 0xb7676dd00aa3cedaULL, 0xa4229786b5d34433ULL, 0x9be5827e6755d719ULL, + 0x238eeaad64eb01c9ULL, 0x2ed31afdc9a1bb34ULL, 0x8da47b29df2e55f6ULL, + 0xf0c0503090cd9da0ULL, 0xd7ec4d3ba188c59aULL, 0xd946bc9ffa308c65ULL, + 0x3fc715f8d286932aULL, 0xf93f57c668297eaeULL, 0x5f4c351379ad986aULL, + 0x1e180a06123a3014ULL, 0x11140f051b27281eULL, 0xf63352c5613466a4ULL, + 0x5544331177bb8866ULL, 0xb6c1997758069f2fULL, 0x91ed847c6943c715ULL, + 0x8ff58e7a7b79f701ULL, 0x85fd8878756fe70dULL, 0xeed85a3682f7adb4ULL, + 0x6c70241c54c4e048ULL, 0xdde44b39af9ed596ULL, 0x2079eb599219f2cbULL, + 0x7860281848e8c050ULL, 0x1345fa56bf708ae9ULL, 0x45f6c8b33e39f18dULL, + 0x4afacdb03724e987ULL, 0xb4906c24fc513dd8ULL, 0xa0806020e07d1dc0ULL, + 0x40f2cbb23932f98bULL, 0xe072ab92d94fe44bULL, 0x15b6f8a34e8971edULL, + 0xe7275dc07a134ebaULL, 0x490dcc44c1d61a85ULL, 0xf795a66233913751ULL, + 0x5040301070b08060ULL, 0x5eeac1b42b08c99fULL, 0xae2a9184bbc5543fULL, + 0x5211c543d4e72297ULL, 0xe576a893de44ec4dULL, 0xed2f5bc274055eb6ULL, + 0x7f35de4aebb46aa1ULL, 0x73cedabd145b81a9ULL, 0x89068c8f8a800c05ULL, + 0x99b4772dc30275eeULL, 0x76cad9bc135089afULL, 0xd64ab99cf32d946fULL, + 0xdfb5be6a0bc97761ULL, 0x5d1dc040ddfa3a9dULL, 0xd41b4ccf577a3698ULL, + 0x10b2fba2498279ebULL, 0xba3a9d80a7e97427ULL, 0x6e21d14ff09342bfULL, + 0x637c211f5dd9f842ULL, 0xc50f43ca4c5d1e86ULL, 0x3892e3aa71da39dbULL, + 0x5715c642d3ec2a91ULL +}; + +static const u64 T4[256] = { + 0xbbb96a01bad3d268ULL, 0xe59a66b154fc4d19ULL, 0xe26514cd2f71bc93ULL, + 0x25871b51749ccdb9ULL, 0xf7a257a453f55102ULL, 0xd0d6be03d3686bb8ULL, + 0xd6deb504d26b6fbdULL, 0xb35285fe4dd72964ULL, 0xfdba4aad50f05d0dULL, + 0xcf09e063ace98a26ULL, 0x091c96848d8a0e83ULL, 0xa5914d1abfdcc679ULL, + 0x3da7374d7090ddadULL, 0xf1aa5ca352f65507ULL, 0x7ba417e19ab352c8ULL, + 0xb55a8ef94cd42d61ULL, 0x460320acea238f65ULL, 0xc4e68411d56273a6ULL, + 0x55cc68c297a466f1ULL, 0xdcc6a80dd16e63b2ULL, 0xaa85d0993355ccffULL, + 0xfbb241aa51f35908ULL, 0xc7e20f9c5bed712aULL, 0xf359ae55a6f7a204ULL, + 0xfebec120de7f5f81ULL, 0xad7aa2e548d83d75ULL, 0xd729cc7fa8e59a32ULL, + 0x71bc0ae899b65ec7ULL, 0xe096e63bdb704b90ULL, 0xac8ddb9e3256c8faULL, + 0x95d11522b7c4e651ULL, 0x32b3aacefc19d72bULL, 0x704b7393e338ab48ULL, + 0x63843bfd9ebf42dcULL, 0x41fc52d091ae7eefULL, 0x7dac1ce69bb056cdULL, + 0x76437894e23baf4dULL, 0xbdb16106bbd0d66dULL, 0x9b32f1da41c31958ULL, + 0x7957e5176eb2a5cbULL, 0xf941b35ca5f2ae0bULL, 0x8016564bcb400bc0ULL, + 0x677fc20c6bbdb1daULL, 0x59dc7ecc95a26efbULL, 0xe1619f40a1febe1fULL, + 0x10cbc3e3f308eb18ULL, 0x81e12f30b1cefe4fULL, 0x0c10160e0206080aULL, + 0x922e675ecc4917dbULL, 0xa26e3f66c45137f3ULL, 0x4ee8cf531d277469ULL, + 0x78a09c6c143c5044ULL, 0xb0560e73c3582be8ULL, 0x573f9a3463a591f2ULL, + 0xe69eed3cda734f95ULL, 0xd3d2358e5de76934ULL, 0xdfc223805fe1613eULL, + 0xf2aed72edc79578bULL, 0x13cf486e7d87e994ULL, 0x94266c59cd4a13deULL, + 0x1fdf5e607f81e19eULL, 0xc1ea049b5aee752fULL, 0x7547f3196cb4adc1ULL, + 0xd5da3e895ce46d31ULL, 0x08ebeffff704fb0cULL, 0xd42d47f2266a98beULL, + 0x38abb7c7ff1cdb24ULL, 0x543b11b9ed2a937eULL, 0x4a1336a2e825876fULL, + 0x699c26f49dba4ed3ULL, 0x7f5fee106fb1a1ceULL, 0x03048b8d8e8f028cULL, + 0x56c8e34f192b647dULL, 0xe7699447a0fdba1aULL, 0x1ad3deeaf00de717ULL, + 0x113cba9889861e97ULL, 0x2278692d0f113c33ULL, 0x1238311507091c1bULL, + 0xc511fd6aafec8629ULL, 0x208b9bdbfb10cb30ULL, 0x3040583808182028ULL, + 0x7ea8976b153f5441ULL, 0x2e687f230d173439ULL, 0x18202c1c040c1014ULL, + 0x06080b0701030405ULL, 0x4507ab2164ac8de9ULL, 0xf8b6ca27df7c5b84ULL, + 0x29970d5f769ac5b3ULL, 0x0bef6472798bf980ULL, 0xf4a6dc29dd7a538eULL, + 0x8ef5b2b33d47f4c9ULL, 0x74b08a62163a584eULL, 0x82e5a4bd3f41fcc3ULL, + 0xb2a5fc853759dcebULL, 0x734ff81e6db7a9c4ULL, 0x90dd95a83848e0d8ULL, + 0xb1a17708b9d6de67ULL, 0x37bf2a447395d1a2ULL, 0x4c1b3da5e926836aULL, + 0xbeb5ea8b355fd4e1ULL, 0xe3926db655ff491cULL, 0x3baf3c4a7193d9a8ULL, + 0x07ff727c7b8df18aULL, 0x0f149d838c890a86ULL, 0x31b721437296d5a7ULL, + 0x1734b19f88851a92ULL, 0x0ee3e4f8f607ff09ULL, 0xfc4d33d62a7ea882ULL, + 0x84edafba3e42f8c6ULL, 0xd9ca28875ee2653bULL, 0xd2254cf527699cbbULL, + 0x890ac0cf46ca0543ULL, 0x286074240c14303cULL, 0x430fa02665af89ecULL, + 0x6d67df0568b8bdd5ULL, 0x5b2f8c3a61a399f8ULL, 0x0a181d0903050c0fULL, + 0xbc46187dc15e23e2ULL, 0xef827bb857f94116ULL, 0xcefe9918d6677fa9ULL, + 0xec86f035d976439aULL, 0xcdfa129558e87d25ULL, 0xea8efb32d875479fULL, + 0x4917bd2f66aa85e3ULL, 0xc8f6921fd7647bacULL, 0x9ccd83a63a4ee8d2ULL, + 0x8a0e4b42c84507cfULL, 0x88fdb9b43c44f0ccULL, 0x268390dcfa13cf35ULL, + 0x53c463c596a762f4ULL, 0xf551a552a7f4a601ULL, 0x77b401ef98b55ac2ULL, + 0x52331abeec29977bULL, 0xb7a97c0fb8d5da62ULL, 0xa876226fc7543bfcULL, + 0xc319f66daeef822cULL, 0x6b6fd40269bbb9d0ULL, 0xa762bfec4bdd317aULL, + 0xdd31d176abe0963dULL, 0xd121c778a9e69e37ULL, 0x4f1fb62867a981e6ULL, + 0x3c504e360a1e2822ULL, 0x8f02cbc847c90146ULL, 0x16c3c8e4f20bef1dULL, + 0x99c1032cb5c2ee5bULL, 0xcc0d6bee226688aaULL, 0x647b4981e532b356ULL, + 0x5e230cb0ee2f9f71ULL, 0xa399461dbedfc27cULL, 0xfa4538d12b7dac87ULL, + 0x217ce2a0819e3ebfULL, 0x6c90a67e1236485aULL, 0x2d6cf4ae839836b5ULL, + 0x5ad8f5411b2d6c77ULL, 0x2470622a0e123836ULL, 0xca0560e923658cafULL, + 0x04fbf9f1f502f306ULL, 0x8312ddc645cf094cULL, 0xc61576e7216384a5ULL, + 0x9e3e7150ce4f1fd1ULL, 0xab72a9e249db3970ULL, 0xe87d09c42c74b09cULL, + 0x2c9b8dd5f916c33aULL, 0x6e635488e637bf59ULL, 0x93d91e25b6c7e254ULL, + 0xf05d25d82878a088ULL, 0x72b8816517395c4bULL, 0x2b64ffa9829b32b0ULL, + 0x5cd0fe461a2e6872ULL, 0x1d2cac968b80169dULL, 0x3ea3bcc0fe1fdf21ULL, + 0x1b24a7918a831298ULL, 0x3648533f091b242dULL, 0x8c064045c94603caULL, + 0x354cd8b2879426a1ULL, 0xb94a98f74ed2256bULL, 0x7c5b659de13ea342ULL, + 0xe46d1fca2e72b896ULL, 0x62734286e431b753ULL, 0x7a536e9ae03da747ULL, + 0x400b2babeb208b60ULL, 0x47f459d790ad7aeaULL, 0xff49b85ba4f1aa0eULL, + 0x44f0d25a1e227866ULL, 0x395ccebc85922eabULL, 0x5d27873d60a09dfdULL, + 0x0000000000000000ULL, 0xde355afb256f94b1ULL, 0x02f3f2f6f401f703ULL, + 0x1cdbd5edf10ee312ULL, 0x5fd475cb94a16afeULL, 0x3a5845310b1d2c27ULL, + 0x686b5f8fe734bb5cULL, 0x238f1056759fc9bcULL, 0x582b07b7ef2c9b74ULL, + 0xb8bde18c345cd0e4ULL, 0xa695c6973153c4f5ULL, 0xc2ee8f16d46177a3ULL, + 0xdacea30ad06d67b7ULL, 0x3344d3b5869722a4ULL, 0x19d755677e82e59bULL, + 0xc901eb64adea8e23ULL, 0x34bba1c9fd1ad32eULL, 0xf6552edf297ba48dULL, + 0xa09dcd903050c0f0ULL, 0x9ac588a13b4decd7ULL, 0x658c30fa9fbc46d9ULL, + 0x2a9386d2f815c73fULL, 0xae7e2968c6573ff9ULL, 0x6a98ad7913354c5fULL, + 0x14303a12060a181eULL, 0x1e28271b050f1411ULL, 0xa4663461c55233f6ULL, + 0x6688bb7711334455ULL, 0x2f9f06587799c1b6ULL, 0x15c743697c84ed91ULL, + 0x01f7797b7a8ef58fULL, 0x0de76f757888fd85ULL, 0xb4adf782365ad8eeULL, + 0x48e0c4541c24706cULL, 0x96d59eaf394be4ddULL, 0xcbf2199259eb7920ULL, + 0x50c0e84818286078ULL, 0xe98a70bf56fa4513ULL, 0x8df1393eb3c8f645ULL, + 0x87e92437b0cdfa4aULL, 0xd83d51fc246c90b4ULL, 0xc01d7de0206080a0ULL, + 0x8bf93239b2cbf240ULL, 0x4be44fd992ab72e0ULL, 0xed71894ea3f8b615ULL, + 0xba4e137ac05d27e7ULL, 0x851ad6c144cc0d49ULL, 0x5137913362a695f7ULL, + 0x6080b07010304050ULL, 0x9fc9082bb4c1ea5eULL, 0x3f54c5bb84912aaeULL, + 0x9722e7d443c51152ULL, 0x4dec44de93a876e5ULL, 0xb65e0574c25b2fedULL, + 0xa16ab4eb4ade357fULL, 0xa9815b14bddace73ULL, 0x050c808a8f8c0689ULL, + 0xee7502c32d77b499ULL, 0xaf895013bcd9ca76ULL, 0x6f942df39cb94ad6ULL, + 0x6177c90b6abeb5dfULL, 0x9d3afadd40c01d5dULL, 0x98367a57cf4c1bd4ULL, + 0xeb798249a2fbb210ULL, 0x2774e9a7809d3abaULL, 0xbf4293f04fd1216eULL, + 0x42f8d95d1f217c63ULL, 0x861e5d4cca430fc5ULL, 0xdb39da71aae39238ULL, + 0x912aecd342c61557ULL +}; + +static const u64 T5[256] = { + 0xb9bb016ad3ba68d2ULL, 0x9ae5b166fc54194dULL, 0x65e2cd14712f93bcULL, + 0x8725511b9c74b9cdULL, 0xa2f7a457f5530251ULL, 0xd6d003be68d3b86bULL, + 0xded604b56bd2bd6fULL, 0x52b3fe85d74d6429ULL, 0xbafdad4af0500d5dULL, + 0x09cf63e0e9ac268aULL, 0x1c0984968a8d830eULL, 0x91a51a4ddcbf79c6ULL, + 0xa73d4d379070adddULL, 0xaaf1a35cf6520755ULL, 0xa47be117b39ac852ULL, + 0x5ab5f98ed44c612dULL, 0x0346ac2023ea658fULL, 0xe6c4118462d5a673ULL, + 0xcc55c268a497f166ULL, 0xc6dc0da86ed1b263ULL, 0x85aa99d05533ffccULL, + 0xb2fbaa41f3510859ULL, 0xe2c79c0fed5b2a71ULL, 0x59f355aef7a604a2ULL, + 0xbefe20c17fde815fULL, 0x7aade5a2d848753dULL, 0x29d77fcce5a8329aULL, + 0xbc71e80ab699c75eULL, 0x96e03be670db904bULL, 0x8dac9edb5632fac8ULL, + 0xd1952215c4b751e6ULL, 0xb332ceaa19fc2bd7ULL, 0x4b70937338e348abULL, + 0x8463fd3bbf9edc42ULL, 0xfc41d052ae91ef7eULL, 0xac7de61cb09bcd56ULL, + 0x437694783be24dafULL, 0xb1bd0661d0bb6dd6ULL, 0x329bdaf1c3415819ULL, + 0x577917e5b26ecba5ULL, 0x41f95cb3f2a50baeULL, 0x16804b5640cbc00bULL, + 0x7f670cc2bd6bdab1ULL, 0xdc59cc7ea295fb6eULL, 0x61e1409ffea11fbeULL, + 0xcb10e3c308f318ebULL, 0xe181302fceb14ffeULL, 0x100c0e1606020a08ULL, + 0x2e925e6749ccdb17ULL, 0x6ea2663f51c4f337ULL, 0xe84e53cf271d6974ULL, + 0xa0786c9c3c144450ULL, 0x56b0730e58c3e82bULL, 0x3f57349aa563f291ULL, + 0x9ee63ced73da954fULL, 0xd2d38e35e75d3469ULL, 0xc2df8023e15f3e61ULL, + 0xaef22ed779dc8b57ULL, 0xcf136e48877d94e9ULL, 0x2694596c4acdde13ULL, + 0xdf1f605e817f9ee1ULL, 0xeac19b04ee5a2f75ULL, 0x477519f3b46cc1adULL, + 0xdad5893ee45c316dULL, 0xeb08ffef04f70cfbULL, 0x2dd4f2476a26be98ULL, + 0xab38c7b71cff24dbULL, 0x3b54b9112aed7e93ULL, 0x134aa23625e86f87ULL, + 0x9c69f426ba9dd34eULL, 0x5f7f10eeb16fcea1ULL, 0x04038d8b8f8e8c02ULL, + 0xc8564fe32b197d64ULL, 0x69e74794fda01abaULL, 0xd31aeade0df017e7ULL, + 0x3c1198ba8689971eULL, 0x78222d69110f333cULL, 0x3812153109071b1cULL, + 0x11c56afdecaf2986ULL, 0x8b20db9b10fb30cbULL, 0x4030385818082820ULL, + 0xa87e6b973f154154ULL, 0x682e237f170d3934ULL, 0x20181c2c0c041410ULL, + 0x0806070b03010504ULL, 0x074521abac64e98dULL, 0xb6f827ca7cdf845bULL, + 0x97295f0d9a76b3c5ULL, 0xef0b72648b7980f9ULL, 0xa6f429dc7add8e53ULL, + 0xf58eb3b2473dc9f4ULL, 0xb074628a3a164e58ULL, 0xe582bda4413fc3fcULL, + 0xa5b285fc5937ebdcULL, 0x4f731ef8b76dc4a9ULL, 0xdd90a8954838d8e0ULL, + 0xa1b10877d6b967deULL, 0xbf37442a9573a2d1ULL, 0x1b4ca53d26e96a83ULL, + 0xb5be8bea5f35e1d4ULL, 0x92e3b66dff551c49ULL, 0xaf3b4a3c9371a8d9ULL, + 0xff077c728d7b8af1ULL, 0x140f839d898c860aULL, 0xb73143219672a7d5ULL, + 0x34179fb18588921aULL, 0xe30ef8e407f609ffULL, 0x4dfcd6337e2a82a8ULL, + 0xed84baaf423ec6f8ULL, 0xcad98728e25e3b65ULL, 0x25d2f54c6927bb9cULL, + 0x0a89cfc0ca464305ULL, 0x60282474140c3c30ULL, 0x0f4326a0af65ec89ULL, + 0x676d05dfb868d5bdULL, 0x2f5b3a8ca361f899ULL, 0x180a091d05030f0cULL, + 0x46bc7d185ec1e223ULL, 0x82efb87bf9571641ULL, 0xfece189967d6a97fULL, + 0x86ec35f076d99a43ULL, 0xfacd9512e858257dULL, 0x8eea32fb75d89f47ULL, + 0x17492fbdaa66e385ULL, 0xf6c81f9264d7ac7bULL, 0xcd9ca6834e3ad2e8ULL, + 0x0e8a424b45c8cf07ULL, 0xfd88b4b9443cccf0ULL, 0x8326dc9013fa35cfULL, + 0xc453c563a796f462ULL, 0x51f552a5f4a701a6ULL, 0xb477ef01b598c25aULL, + 0x3352be1a29ec7b97ULL, 0xa9b70f7cd5b862daULL, 0x76a86f2254c7fc3bULL, + 0x19c36df6efae2c82ULL, 0x6f6b02d4bb69d0b9ULL, 0x62a7ecbfdd4b7a31ULL, + 0x31dd76d1e0ab3d96ULL, 0x21d178c7e6a9379eULL, 0x1f4f28b6a967e681ULL, + 0x503c364e1e0a2228ULL, 0x028fc8cbc9474601ULL, 0xc316e4c80bf21defULL, + 0xc1992c03c2b55beeULL, 0x0dccee6b6622aa88ULL, 0x7b64814932e556b3ULL, + 0x235eb00c2fee719fULL, 0x99a31d46dfbe7cc2ULL, 0x45fad1387d2b87acULL, + 0x7c21a0e29e81bf3eULL, 0x906c7ea636125a48ULL, 0x6c2daef49883b536ULL, + 0xd85a41f52d1b776cULL, 0x70242a62120e3638ULL, 0x05cae9606523af8cULL, + 0xfb04f1f902f506f3ULL, 0x1283c6ddcf454c09ULL, 0x15c6e7766321a584ULL, + 0x3e9e50714fced11fULL, 0x72abe2a9db497039ULL, 0x7de8c409742c9cb0ULL, + 0x9b2cd58d16f93ac3ULL, 0x636e885437e659bfULL, 0xd993251ec7b654e2ULL, + 0x5df0d825782888a0ULL, 0xb872658139174b5cULL, 0x642ba9ff9b82b032ULL, + 0xd05c46fe2e1a7268ULL, 0x2c1d96ac808b9d16ULL, 0xa33ec0bc1ffe21dfULL, + 0x241b91a7838a9812ULL, 0x48363f531b092d24ULL, 0x068c454046c9ca03ULL, + 0x4c35b2d89487a126ULL, 0x4ab9f798d24e6b25ULL, 0x5b7c9d653ee142a3ULL, + 0x6de4ca1f722e96b8ULL, 0x7362864231e453b7ULL, 0x537a9a6e3de047a7ULL, + 0x0b40ab2b20eb608bULL, 0xf447d759ad90ea7aULL, 0x49ff5bb8f1a40eaaULL, + 0xf0445ad2221e6678ULL, 0x5c39bcce9285ab2eULL, 0x275d3d87a060fd9dULL, + 0x0000000000000000ULL, 0x35defb5a6f25b194ULL, 0xf302f6f201f403f7ULL, + 0xdb1cedd50ef112e3ULL, 0xd45fcb75a194fe6aULL, 0x583a31451d0b272cULL, + 0x6b688f5f34e75cbbULL, 0x8f2356109f75bcc9ULL, 0x2b58b7072cef749bULL, + 0xbdb88ce15c34e4d0ULL, 0x95a697c65331f5c4ULL, 0xeec2168f61d4a377ULL, + 0xceda0aa36dd0b767ULL, 0x4433b5d39786a422ULL, 0xd7196755827e9be5ULL, + 0x01c964ebeaad238eULL, 0xbb34c9a11afd2ed3ULL, 0x55f6df2e7b298da4ULL, + 0x9da090cd5030f0c0ULL, 0xc59aa1884d3bd7ecULL, 0x8c65fa30bc9fd946ULL, + 0x932ad28615f83fc7ULL, 0x7eae682957c6f93fULL, 0x986a79ad35135f4cULL, + 0x3014123a0a061e18ULL, 0x281e1b270f051114ULL, 0x66a4613452c5f633ULL, + 0x886677bb33115544ULL, 0x9f2f58069977b6c1ULL, 0xc7156943847c91edULL, + 0xf7017b798e7a8ff5ULL, 0xe70d756f887885fdULL, 0xadb482f75a36eed8ULL, + 0xe04854c4241c6c70ULL, 0xd596af9e4b39dde4ULL, 0xf2cb9219eb592079ULL, + 0xc05048e828187860ULL, 0x8ae9bf70fa561345ULL, 0xf18d3e39c8b345f6ULL, + 0xe9873724cdb04afaULL, 0x3dd8fc516c24b490ULL, 0x1dc0e07d6020a080ULL, + 0xf98b3932cbb240f2ULL, 0xe44bd94fab92e072ULL, 0x71ed4e89f8a315b6ULL, + 0x4eba7a135dc0e727ULL, 0x1a85c1d6cc44490dULL, 0x37513391a662f795ULL, + 0x806070b030105040ULL, 0xc99f2b08c1b45eeaULL, 0x543fbbc59184ae2aULL, + 0x2297d4e7c5435211ULL, 0xec4dde44a893e576ULL, 0x5eb674055bc2ed2fULL, + 0x6aa1ebb4de4a7f35ULL, 0x81a9145bdabd73ceULL, 0x0c058a808c8f8906ULL, + 0x75eec302772d99b4ULL, 0x89af1350d9bc76caULL, 0x946ff32db99cd64aULL, + 0x77610bc9be6adfb5ULL, 0x3a9dddfac0405d1dULL, 0x3698577a4ccfd41bULL, + 0x79eb4982fba210b2ULL, 0x7427a7e99d80ba3aULL, 0x42bff093d14f6e21ULL, + 0xf8425dd9211f637cULL, 0x1e864c5d43cac50fULL, 0x39db71dae3aa3892ULL, + 0x2a91d3ecc6425715ULL +}; + +static const u64 T6[256] = { + 0x6a01bbb9d268bad3ULL, 0x66b1e59a4d1954fcULL, 0x14cde265bc932f71ULL, + 0x1b512587cdb9749cULL, 0x57a4f7a2510253f5ULL, 0xbe03d0d66bb8d368ULL, + 0xb504d6de6fbdd26bULL, 0x85feb35229644dd7ULL, 0x4aadfdba5d0d50f0ULL, + 0xe063cf098a26ace9ULL, 0x9684091c0e838d8aULL, 0x4d1aa591c679bfdcULL, + 0x374d3da7ddad7090ULL, 0x5ca3f1aa550752f6ULL, 0x17e17ba452c89ab3ULL, + 0x8ef9b55a2d614cd4ULL, 0x20ac46038f65ea23ULL, 0x8411c4e673a6d562ULL, + 0x68c255cc66f197a4ULL, 0xa80ddcc663b2d16eULL, 0xd099aa85ccff3355ULL, + 0x41aafbb2590851f3ULL, 0x0f9cc7e2712a5bedULL, 0xae55f359a204a6f7ULL, + 0xc120febe5f81de7fULL, 0xa2e5ad7a3d7548d8ULL, 0xcc7fd7299a32a8e5ULL, + 0x0ae871bc5ec799b6ULL, 0xe63be0964b90db70ULL, 0xdb9eac8dc8fa3256ULL, + 0x152295d1e651b7c4ULL, 0xaace32b3d72bfc19ULL, 0x7393704bab48e338ULL, + 0x3bfd638442dc9ebfULL, 0x52d041fc7eef91aeULL, 0x1ce67dac56cd9bb0ULL, + 0x78947643af4de23bULL, 0x6106bdb1d66dbbd0ULL, 0xf1da9b32195841c3ULL, + 0xe5177957a5cb6eb2ULL, 0xb35cf941ae0ba5f2ULL, 0x564b80160bc0cb40ULL, + 0xc20c677fb1da6bbdULL, 0x7ecc59dc6efb95a2ULL, 0x9f40e161be1fa1feULL, + 0xc3e310cbeb18f308ULL, 0x2f3081e1fe4fb1ceULL, 0x160e0c10080a0206ULL, + 0x675e922e17dbcc49ULL, 0x3f66a26e37f3c451ULL, 0xcf534ee874691d27ULL, + 0x9c6c78a05044143cULL, 0x0e73b0562be8c358ULL, 0x9a34573f91f263a5ULL, + 0xed3ce69e4f95da73ULL, 0x358ed3d269345de7ULL, 0x2380dfc2613e5fe1ULL, + 0xd72ef2ae578bdc79ULL, 0x486e13cfe9947d87ULL, 0x6c59942613decd4aULL, + 0x5e601fdfe19e7f81ULL, 0x049bc1ea752f5aeeULL, 0xf3197547adc16cb4ULL, + 0x3e89d5da6d315ce4ULL, 0xefff08ebfb0cf704ULL, 0x47f2d42d98be266aULL, + 0xb7c738abdb24ff1cULL, 0x11b9543b937eed2aULL, 0x36a24a13876fe825ULL, + 0x26f4699c4ed39dbaULL, 0xee107f5fa1ce6fb1ULL, 0x8b8d0304028c8e8fULL, + 0xe34f56c8647d192bULL, 0x9447e769ba1aa0fdULL, 0xdeea1ad3e717f00dULL, + 0xba98113c1e978986ULL, 0x692d22783c330f11ULL, 0x311512381c1b0709ULL, + 0xfd6ac5118629afecULL, 0x9bdb208bcb30fb10ULL, 0x5838304020280818ULL, + 0x976b7ea85441153fULL, 0x7f232e6834390d17ULL, 0x2c1c18201014040cULL, + 0x0b07060804050103ULL, 0xab2145078de964acULL, 0xca27f8b65b84df7cULL, + 0x0d5f2997c5b3769aULL, 0x64720beff980798bULL, 0xdc29f4a6538edd7aULL, + 0xb2b38ef5f4c93d47ULL, 0x8a6274b0584e163aULL, 0xa4bd82e5fcc33f41ULL, + 0xfc85b2a5dceb3759ULL, 0xf81e734fa9c46db7ULL, 0x95a890dde0d83848ULL, + 0x7708b1a1de67b9d6ULL, 0x2a4437bfd1a27395ULL, 0x3da54c1b836ae926ULL, + 0xea8bbeb5d4e1355fULL, 0x6db6e392491c55ffULL, 0x3c4a3bafd9a87193ULL, + 0x727c07fff18a7b8dULL, 0x9d830f140a868c89ULL, 0x214331b7d5a77296ULL, + 0xb19f17341a928885ULL, 0xe4f80ee3ff09f607ULL, 0x33d6fc4da8822a7eULL, + 0xafba84edf8c63e42ULL, 0x2887d9ca653b5ee2ULL, 0x4cf5d2259cbb2769ULL, + 0xc0cf890a054346caULL, 0x74242860303c0c14ULL, 0xa026430f89ec65afULL, + 0xdf056d67bdd568b8ULL, 0x8c3a5b2f99f861a3ULL, 0x1d090a180c0f0305ULL, + 0x187dbc4623e2c15eULL, 0x7bb8ef82411657f9ULL, 0x9918cefe7fa9d667ULL, + 0xf035ec86439ad976ULL, 0x1295cdfa7d2558e8ULL, 0xfb32ea8e479fd875ULL, + 0xbd2f491785e366aaULL, 0x921fc8f67bacd764ULL, 0x83a69ccde8d23a4eULL, + 0x4b428a0e07cfc845ULL, 0xb9b488fdf0cc3c44ULL, 0x90dc2683cf35fa13ULL, + 0x63c553c462f496a7ULL, 0xa552f551a601a7f4ULL, 0x01ef77b45ac298b5ULL, + 0x1abe5233977bec29ULL, 0x7c0fb7a9da62b8d5ULL, 0x226fa8763bfcc754ULL, + 0xf66dc319822caeefULL, 0xd4026b6fb9d069bbULL, 0xbfeca762317a4bddULL, + 0xd176dd31963dabe0ULL, 0xc778d1219e37a9e6ULL, 0xb6284f1f81e667a9ULL, + 0x4e363c5028220a1eULL, 0xcbc88f02014647c9ULL, 0xc8e416c3ef1df20bULL, + 0x032c99c1ee5bb5c2ULL, 0x6beecc0d88aa2266ULL, 0x4981647bb356e532ULL, + 0x0cb05e239f71ee2fULL, 0x461da399c27cbedfULL, 0x38d1fa45ac872b7dULL, + 0xe2a0217c3ebf819eULL, 0xa67e6c90485a1236ULL, 0xf4ae2d6c36b58398ULL, + 0xf5415ad86c771b2dULL, 0x622a247038360e12ULL, 0x60e9ca058caf2365ULL, + 0xf9f104fbf306f502ULL, 0xddc68312094c45cfULL, 0x76e7c61584a52163ULL, + 0x71509e3e1fd1ce4fULL, 0xa9e2ab72397049dbULL, 0x09c4e87db09c2c74ULL, + 0x8dd52c9bc33af916ULL, 0x54886e63bf59e637ULL, 0x1e2593d9e254b6c7ULL, + 0x25d8f05da0882878ULL, 0x816572b85c4b1739ULL, 0xffa92b6432b0829bULL, + 0xfe465cd068721a2eULL, 0xac961d2c169d8b80ULL, 0xbcc03ea3df21fe1fULL, + 0xa7911b2412988a83ULL, 0x533f3648242d091bULL, 0x40458c0603cac946ULL, + 0xd8b2354c26a18794ULL, 0x98f7b94a256b4ed2ULL, 0x659d7c5ba342e13eULL, + 0x1fcae46db8962e72ULL, 0x42866273b753e431ULL, 0x6e9a7a53a747e03dULL, + 0x2bab400b8b60eb20ULL, 0x59d747f47aea90adULL, 0xb85bff49aa0ea4f1ULL, + 0xd25a44f078661e22ULL, 0xcebc395c2eab8592ULL, 0x873d5d279dfd60a0ULL, + 0x0000000000000000ULL, 0x5afbde3594b1256fULL, 0xf2f602f3f703f401ULL, + 0xd5ed1cdbe312f10eULL, 0x75cb5fd46afe94a1ULL, 0x45313a582c270b1dULL, + 0x5f8f686bbb5ce734ULL, 0x1056238fc9bc759fULL, 0x07b7582b9b74ef2cULL, + 0xe18cb8bdd0e4345cULL, 0xc697a695c4f53153ULL, 0x8f16c2ee77a3d461ULL, + 0xa30adace67b7d06dULL, 0xd3b5334422a48697ULL, 0x556719d7e59b7e82ULL, + 0xeb64c9018e23adeaULL, 0xa1c934bbd32efd1aULL, 0x2edff655a48d297bULL, + 0xcd90a09dc0f03050ULL, 0x88a19ac5ecd73b4dULL, 0x30fa658c46d99fbcULL, + 0x86d22a93c73ff815ULL, 0x2968ae7e3ff9c657ULL, 0xad796a984c5f1335ULL, + 0x3a121430181e060aULL, 0x271b1e281411050fULL, 0x3461a46633f6c552ULL, + 0xbb77668844551133ULL, 0x06582f9fc1b67799ULL, 0x436915c7ed917c84ULL, + 0x797b01f7f58f7a8eULL, 0x6f750de7fd857888ULL, 0xf782b4add8ee365aULL, + 0xc45448e0706c1c24ULL, 0x9eaf96d5e4dd394bULL, 0x1992cbf2792059ebULL, + 0xe84850c060781828ULL, 0x70bfe98a451356faULL, 0x393e8df1f645b3c8ULL, + 0x243787e9fa4ab0cdULL, 0x51fcd83d90b4246cULL, 0x7de0c01d80a02060ULL, + 0x32398bf9f240b2cbULL, 0x4fd94be472e092abULL, 0x894eed71b615a3f8ULL, + 0x137aba4e27e7c05dULL, 0xd6c1851a0d4944ccULL, 0x9133513795f762a6ULL, + 0xb070608040501030ULL, 0x082b9fc9ea5eb4c1ULL, 0xc5bb3f542aae8491ULL, + 0xe7d49722115243c5ULL, 0x44de4dec76e593a8ULL, 0x0574b65e2fedc25bULL, + 0xb4eba16a357f4adeULL, 0x5b14a981ce73bddaULL, 0x808a050c06898f8cULL, + 0x02c3ee75b4992d77ULL, 0x5013af89ca76bcd9ULL, 0x2df36f944ad69cb9ULL, + 0xc90b6177b5df6abeULL, 0xfadd9d3a1d5d40c0ULL, 0x7a5798361bd4cf4cULL, + 0x8249eb79b210a2fbULL, 0xe9a727743aba809dULL, 0x93f0bf42216e4fd1ULL, + 0xd95d42f87c631f21ULL, 0x5d4c861e0fc5ca43ULL, 0xda71db399238aae3ULL, + 0xecd3912a155742c6ULL +}; + +static const u64 T7[256] = { + 0x016ab9bb68d2d3baULL, 0xb1669ae5194dfc54ULL, 0xcd1465e293bc712fULL, + 0x511b8725b9cd9c74ULL, 0xa457a2f70251f553ULL, 0x03bed6d0b86b68d3ULL, + 0x04b5ded6bd6f6bd2ULL, 0xfe8552b36429d74dULL, 0xad4abafd0d5df050ULL, + 0x63e009cf268ae9acULL, 0x84961c09830e8a8dULL, 0x1a4d91a579c6dcbfULL, + 0x4d37a73daddd9070ULL, 0xa35caaf10755f652ULL, 0xe117a47bc852b39aULL, + 0xf98e5ab5612dd44cULL, 0xac200346658f23eaULL, 0x1184e6c4a67362d5ULL, + 0xc268cc55f166a497ULL, 0x0da8c6dcb2636ed1ULL, 0x99d085aaffcc5533ULL, + 0xaa41b2fb0859f351ULL, 0x9c0fe2c72a71ed5bULL, 0x55ae59f304a2f7a6ULL, + 0x20c1befe815f7fdeULL, 0xe5a27aad753dd848ULL, 0x7fcc29d7329ae5a8ULL, + 0xe80abc71c75eb699ULL, 0x3be696e0904b70dbULL, 0x9edb8dacfac85632ULL, + 0x2215d19551e6c4b7ULL, 0xceaab3322bd719fcULL, 0x93734b7048ab38e3ULL, + 0xfd3b8463dc42bf9eULL, 0xd052fc41ef7eae91ULL, 0xe61cac7dcd56b09bULL, + 0x947843764daf3be2ULL, 0x0661b1bd6dd6d0bbULL, 0xdaf1329b5819c341ULL, + 0x17e55779cba5b26eULL, 0x5cb341f90baef2a5ULL, 0x4b561680c00b40cbULL, + 0x0cc27f67dab1bd6bULL, 0xcc7edc59fb6ea295ULL, 0x409f61e11fbefea1ULL, + 0xe3c3cb1018eb08f3ULL, 0x302fe1814ffeceb1ULL, 0x0e16100c0a080602ULL, + 0x5e672e92db1749ccULL, 0x663f6ea2f33751c4ULL, 0x53cfe84e6974271dULL, + 0x6c9ca07844503c14ULL, 0x730e56b0e82b58c3ULL, 0x349a3f57f291a563ULL, + 0x3ced9ee6954f73daULL, 0x8e35d2d33469e75dULL, 0x8023c2df3e61e15fULL, + 0x2ed7aef28b5779dcULL, 0x6e48cf1394e9877dULL, 0x596c2694de134acdULL, + 0x605edf1f9ee1817fULL, 0x9b04eac12f75ee5aULL, 0x19f34775c1adb46cULL, + 0x893edad5316de45cULL, 0xffefeb080cfb04f7ULL, 0xf2472dd4be986a26ULL, + 0xc7b7ab3824db1cffULL, 0xb9113b547e932aedULL, 0xa236134a6f8725e8ULL, + 0xf4269c69d34eba9dULL, 0x10ee5f7fcea1b16fULL, 0x8d8b04038c028f8eULL, + 0x4fe3c8567d642b19ULL, 0x479469e71abafda0ULL, 0xeaded31a17e70df0ULL, + 0x98ba3c11971e8689ULL, 0x2d697822333c110fULL, 0x153138121b1c0907ULL, + 0x6afd11c52986ecafULL, 0xdb9b8b2030cb10fbULL, 0x3858403028201808ULL, + 0x6b97a87e41543f15ULL, 0x237f682e3934170dULL, 0x1c2c201814100c04ULL, + 0x070b080605040301ULL, 0x21ab0745e98dac64ULL, 0x27cab6f8845b7cdfULL, + 0x5f0d9729b3c59a76ULL, 0x7264ef0b80f98b79ULL, 0x29dca6f48e537addULL, + 0xb3b2f58ec9f4473dULL, 0x628ab0744e583a16ULL, 0xbda4e582c3fc413fULL, + 0x85fca5b2ebdc5937ULL, 0x1ef84f73c4a9b76dULL, 0xa895dd90d8e04838ULL, + 0x0877a1b167ded6b9ULL, 0x442abf37a2d19573ULL, 0xa53d1b4c6a8326e9ULL, + 0x8beab5bee1d45f35ULL, 0xb66d92e31c49ff55ULL, 0x4a3caf3ba8d99371ULL, + 0x7c72ff078af18d7bULL, 0x839d140f860a898cULL, 0x4321b731a7d59672ULL, + 0x9fb13417921a8588ULL, 0xf8e4e30e09ff07f6ULL, 0xd6334dfc82a87e2aULL, + 0xbaafed84c6f8423eULL, 0x8728cad93b65e25eULL, 0xf54c25d2bb9c6927ULL, + 0xcfc00a894305ca46ULL, 0x247460283c30140cULL, 0x26a00f43ec89af65ULL, + 0x05df676dd5bdb868ULL, 0x3a8c2f5bf899a361ULL, 0x091d180a0f0c0503ULL, + 0x7d1846bce2235ec1ULL, 0xb87b82ef1641f957ULL, 0x1899fecea97f67d6ULL, + 0x35f086ec9a4376d9ULL, 0x9512facd257de858ULL, 0x32fb8eea9f4775d8ULL, + 0x2fbd1749e385aa66ULL, 0x1f92f6c8ac7b64d7ULL, 0xa683cd9cd2e84e3aULL, + 0x424b0e8acf0745c8ULL, 0xb4b9fd88ccf0443cULL, 0xdc90832635cf13faULL, + 0xc563c453f462a796ULL, 0x52a551f501a6f4a7ULL, 0xef01b477c25ab598ULL, + 0xbe1a33527b9729ecULL, 0x0f7ca9b762dad5b8ULL, 0x6f2276a8fc3b54c7ULL, + 0x6df619c32c82efaeULL, 0x02d46f6bd0b9bb69ULL, 0xecbf62a77a31dd4bULL, + 0x76d131dd3d96e0abULL, 0x78c721d1379ee6a9ULL, 0x28b61f4fe681a967ULL, + 0x364e503c22281e0aULL, 0xc8cb028f4601c947ULL, 0xe4c8c3161def0bf2ULL, + 0x2c03c1995beec2b5ULL, 0xee6b0dccaa886622ULL, 0x81497b6456b332e5ULL, + 0xb00c235e719f2feeULL, 0x1d4699a37cc2dfbeULL, 0xd13845fa87ac7d2bULL, + 0xa0e27c21bf3e9e81ULL, 0x7ea6906c5a483612ULL, 0xaef46c2db5369883ULL, + 0x41f5d85a776c2d1bULL, 0x2a6270243638120eULL, 0xe96005caaf8c6523ULL, + 0xf1f9fb0406f302f5ULL, 0xc6dd12834c09cf45ULL, 0xe77615c6a5846321ULL, + 0x50713e9ed11f4fceULL, 0xe2a972ab7039db49ULL, 0xc4097de89cb0742cULL, + 0xd58d9b2c3ac316f9ULL, 0x8854636e59bf37e6ULL, 0x251ed99354e2c7b6ULL, + 0xd8255df088a07828ULL, 0x6581b8724b5c3917ULL, 0xa9ff642bb0329b82ULL, + 0x46fed05c72682e1aULL, 0x96ac2c1d9d16808bULL, 0xc0bca33e21df1ffeULL, + 0x91a7241b9812838aULL, 0x3f5348362d241b09ULL, 0x4540068cca0346c9ULL, + 0xb2d84c35a1269487ULL, 0xf7984ab96b25d24eULL, 0x9d655b7c42a33ee1ULL, + 0xca1f6de496b8722eULL, 0x8642736253b731e4ULL, 0x9a6e537a47a73de0ULL, + 0xab2b0b40608b20ebULL, 0xd759f447ea7aad90ULL, 0x5bb849ff0eaaf1a4ULL, + 0x5ad2f0446678221eULL, 0xbcce5c39ab2e9285ULL, 0x3d87275dfd9da060ULL, + 0x0000000000000000ULL, 0xfb5a35deb1946f25ULL, 0xf6f2f30203f701f4ULL, + 0xedd5db1c12e30ef1ULL, 0xcb75d45ffe6aa194ULL, 0x3145583a272c1d0bULL, + 0x8f5f6b685cbb34e7ULL, 0x56108f23bcc99f75ULL, 0xb7072b58749b2cefULL, + 0x8ce1bdb8e4d05c34ULL, 0x97c695a6f5c45331ULL, 0x168feec2a37761d4ULL, + 0x0aa3cedab7676dd0ULL, 0xb5d34433a4229786ULL, 0x6755d7199be5827eULL, + 0x64eb01c9238eeaadULL, 0xc9a1bb342ed31afdULL, 0xdf2e55f68da47b29ULL, + 0x90cd9da0f0c05030ULL, 0xa188c59ad7ec4d3bULL, 0xfa308c65d946bc9fULL, + 0xd286932a3fc715f8ULL, 0x68297eaef93f57c6ULL, 0x79ad986a5f4c3513ULL, + 0x123a30141e180a06ULL, 0x1b27281e11140f05ULL, 0x613466a4f63352c5ULL, + 0x77bb886655443311ULL, 0x58069f2fb6c19977ULL, 0x6943c71591ed847cULL, + 0x7b79f7018ff58e7aULL, 0x756fe70d85fd8878ULL, 0x82f7adb4eed85a36ULL, + 0x54c4e0486c70241cULL, 0xaf9ed596dde44b39ULL, 0x9219f2cb2079eb59ULL, + 0x48e8c05078602818ULL, 0xbf708ae91345fa56ULL, 0x3e39f18d45f6c8b3ULL, + 0x3724e9874afacdb0ULL, 0xfc513dd8b4906c24ULL, 0xe07d1dc0a0806020ULL, + 0x3932f98b40f2cbb2ULL, 0xd94fe44be072ab92ULL, 0x4e8971ed15b6f8a3ULL, + 0x7a134ebae7275dc0ULL, 0xc1d61a85490dcc44ULL, 0x33913751f795a662ULL, + 0x70b0806050403010ULL, 0x2b08c99f5eeac1b4ULL, 0xbbc5543fae2a9184ULL, + 0xd4e722975211c543ULL, 0xde44ec4de576a893ULL, 0x74055eb6ed2f5bc2ULL, + 0xebb46aa17f35de4aULL, 0x145b81a973cedabdULL, 0x8a800c0589068c8fULL, + 0xc30275ee99b4772dULL, 0x135089af76cad9bcULL, 0xf32d946fd64ab99cULL, + 0x0bc97761dfb5be6aULL, 0xddfa3a9d5d1dc040ULL, 0x577a3698d41b4ccfULL, + 0x498279eb10b2fba2ULL, 0xa7e97427ba3a9d80ULL, 0xf09342bf6e21d14fULL, + 0x5dd9f842637c211fULL, 0x4c5d1e86c50f43caULL, 0x71da39db3892e3aaULL, + 0xd3ec2a915715c642ULL +}; + +static const u64 c[KHAZAD_ROUNDS + 1] = { + 0xba542f7453d3d24dULL, 0x50ac8dbf70529a4cULL, 0xead597d133515ba6ULL, + 0xde48a899db32b7fcULL, 0xe39e919be2bb416eULL, 0xa5cb6b95a1f3b102ULL, + 0xccc41d14c363da5dULL, 0x5fdc7dcd7f5a6c5cULL, 0xf726ffede89d6f8eULL +}; + +static int khazad_setkey(void *ctx_arg, const u8 *in_key, + unsigned int key_len, u32 *flags) +{ + + struct khazad_ctx *ctx = ctx_arg; + int r; + const u64 *S = T7; + u64 K2, K1; + + if (key_len != 16) + { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + K2 = ((u64)in_key[ 0] << 56) ^ + ((u64)in_key[ 1] << 48) ^ + ((u64)in_key[ 2] << 40) ^ + ((u64)in_key[ 3] << 32) ^ + ((u64)in_key[ 4] << 24) ^ + ((u64)in_key[ 5] << 16) ^ + ((u64)in_key[ 6] << 8) ^ + ((u64)in_key[ 7] ); + K1 = ((u64)in_key[ 8] << 56) ^ + ((u64)in_key[ 9] << 48) ^ + ((u64)in_key[10] << 40) ^ + ((u64)in_key[11] << 32) ^ + ((u64)in_key[12] << 24) ^ + ((u64)in_key[13] << 16) ^ + ((u64)in_key[14] << 8) ^ + ((u64)in_key[15] ); + + /* setup the encrypt key */ + for (r = 0; r <= KHAZAD_ROUNDS; r++) { + ctx->E[r] = T0[(int)(K1 >> 56) ] ^ + T1[(int)(K1 >> 48) & 0xff] ^ + T2[(int)(K1 >> 40) & 0xff] ^ + T3[(int)(K1 >> 32) & 0xff] ^ + T4[(int)(K1 >> 24) & 0xff] ^ + T5[(int)(K1 >> 16) & 0xff] ^ + T6[(int)(K1 >> 8) & 0xff] ^ + T7[(int)(K1 ) & 0xff] ^ + c[r] ^ K2; + K2 = K1; + K1 = ctx->E[r]; + } + /* Setup the decrypt key */ + ctx->D[0] = ctx->E[KHAZAD_ROUNDS]; + for (r = 1; r < KHAZAD_ROUNDS; r++) { + K1 = ctx->E[KHAZAD_ROUNDS - r]; + ctx->D[r] = T0[(int)S[(int)(K1 >> 56) ] & 0xff] ^ + T1[(int)S[(int)(K1 >> 48) & 0xff] & 0xff] ^ + T2[(int)S[(int)(K1 >> 40) & 0xff] & 0xff] ^ + T3[(int)S[(int)(K1 >> 32) & 0xff] & 0xff] ^ + T4[(int)S[(int)(K1 >> 24) & 0xff] & 0xff] ^ + T5[(int)S[(int)(K1 >> 16) & 0xff] & 0xff] ^ + T6[(int)S[(int)(K1 >> 8) & 0xff] & 0xff] ^ + T7[(int)S[(int)(K1 ) & 0xff] & 0xff]; + } + ctx->D[KHAZAD_ROUNDS] = ctx->E[0]; + + return 0; + +} + +static void khazad_crypt(const u64 roundKey[KHAZAD_ROUNDS + 1], + u8 *ciphertext, const u8 *plaintext) +{ + + int r; + u64 state; + + state = ((u64)plaintext[0] << 56) ^ + ((u64)plaintext[1] << 48) ^ + ((u64)plaintext[2] << 40) ^ + ((u64)plaintext[3] << 32) ^ + ((u64)plaintext[4] << 24) ^ + ((u64)plaintext[5] << 16) ^ + ((u64)plaintext[6] << 8) ^ + ((u64)plaintext[7] ) ^ + roundKey[0]; + + for (r = 1; r < KHAZAD_ROUNDS; r++) { + state = T0[(int)(state >> 56) ] ^ + T1[(int)(state >> 48) & 0xff] ^ + T2[(int)(state >> 40) & 0xff] ^ + T3[(int)(state >> 32) & 0xff] ^ + T4[(int)(state >> 24) & 0xff] ^ + T5[(int)(state >> 16) & 0xff] ^ + T6[(int)(state >> 8) & 0xff] ^ + T7[(int)(state ) & 0xff] ^ + roundKey[r]; + } + + state = (T0[(int)(state >> 56) ] & 0xff00000000000000ULL) ^ + (T1[(int)(state >> 48) & 0xff] & 0x00ff000000000000ULL) ^ + (T2[(int)(state >> 40) & 0xff] & 0x0000ff0000000000ULL) ^ + (T3[(int)(state >> 32) & 0xff] & 0x000000ff00000000ULL) ^ + (T4[(int)(state >> 24) & 0xff] & 0x00000000ff000000ULL) ^ + (T5[(int)(state >> 16) & 0xff] & 0x0000000000ff0000ULL) ^ + (T6[(int)(state >> 8) & 0xff] & 0x000000000000ff00ULL) ^ + (T7[(int)(state ) & 0xff] & 0x00000000000000ffULL) ^ + roundKey[KHAZAD_ROUNDS]; + + ciphertext[0] = (u8)(state >> 56); + ciphertext[1] = (u8)(state >> 48); + ciphertext[2] = (u8)(state >> 40); + ciphertext[3] = (u8)(state >> 32); + ciphertext[4] = (u8)(state >> 24); + ciphertext[5] = (u8)(state >> 16); + ciphertext[6] = (u8)(state >> 8); + ciphertext[7] = (u8)(state ); + +} + +static void khazad_encrypt(void *ctx_arg, u8 *dst, const u8 *src) +{ + struct khazad_ctx *ctx = ctx_arg; + khazad_crypt(ctx->E, dst, src); +} + +static void khazad_decrypt(void *ctx_arg, u8 *dst, const u8 *src) +{ + struct khazad_ctx *ctx = ctx_arg; + khazad_crypt(ctx->D, dst, src); +} + +static struct crypto_alg khazad_alg = { + .cra_name = "khazad", + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = KHAZAD_BLOCK_SIZE, + .cra_ctxsize = sizeof (struct khazad_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(khazad_alg.cra_list), + .cra_u = { .cipher = { + .cia_min_keysize = KHAZAD_KEY_SIZE, + .cia_max_keysize = KHAZAD_KEY_SIZE, + .cia_setkey = khazad_setkey, + .cia_encrypt = khazad_encrypt, + .cia_decrypt = khazad_decrypt } } +}; + +static int __init init(void) +{ + int ret = 0; + + ret = crypto_register_alg(&khazad_alg); + return ret; +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&khazad_alg); +} + + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Khazad Cryptographic Algorithm"); diff -urN linux-2.4.27/crypto/scatterwalk.c linux-2.4.28/crypto/scatterwalk.c --- linux-2.4.27/crypto/scatterwalk.c 2004-04-14 06:05:28.000000000 -0700 +++ linux-2.4.28/crypto/scatterwalk.c 2004-11-17 03:54:21.174380461 -0800 @@ -70,7 +70,7 @@ { /* walk->data may be pointing the first byte of the next page; however, we know we transfered at least one byte. So, - walk->data - 1 will be a virutual address in the mapped page. */ + walk->data - 1 will be a virtual address in the mapped page. */ if (out) flush_dcache_page(walk->page); diff -urN linux-2.4.27/crypto/serpent.c linux-2.4.28/crypto/serpent.c --- linux-2.4.27/crypto/serpent.c 2004-02-18 05:36:31.000000000 -0800 +++ linux-2.4.28/crypto/serpent.c 2004-11-17 03:54:21.175380502 -0800 @@ -4,6 +4,10 @@ * Serpent Cipher Algorithm. * * Copyright (C) 2002 Dag Arne Osvik + * 2003 Herbert Valerio Riedel + * + * Added tnepres support: Ruben Jesus Garcia Hernandez , 18.10.2004 + * Based on code by hvr * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -212,7 +216,8 @@ u32 expkey[SERPENT_EXPKEY_WORDS]; }; -static int setkey(void *ctx, const u8 *key, unsigned int keylen, u32 *flags) + +static int serpent_setkey(void *ctx, const u8 *key, unsigned int keylen, u32 *flags) { u32 *k = ((struct serpent_ctx *)ctx)->expkey; u8 *k8 = (u8 *)k; @@ -362,7 +367,7 @@ return 0; } -static void encrypt(void *ctx, u8 *dst, const u8 *src) +static void serpent_encrypt(void *ctx, u8 *dst, const u8 *src) { const u32 *k = ((struct serpent_ctx *)ctx)->expkey, @@ -420,7 +425,7 @@ d[3] = cpu_to_le32(r3); } -static void decrypt(void *ctx, u8 *dst, const u8 *src) +static void serpent_decrypt(void *ctx, u8 *dst, const u8 *src) { const u32 *k = ((struct serpent_ctx *)ctx)->expkey, @@ -483,18 +488,101 @@ .cra_u = { .cipher = { .cia_min_keysize = SERPENT_MIN_KEY_SIZE, .cia_max_keysize = SERPENT_MAX_KEY_SIZE, - .cia_setkey = setkey, - .cia_encrypt = encrypt, - .cia_decrypt = decrypt } } + .cia_setkey = serpent_setkey, + .cia_encrypt = serpent_encrypt, + .cia_decrypt = serpent_decrypt } } +}; + +static int tnepres_setkey(void *ctx, const u8 *key, unsigned int keylen, u32 *flags) +{ + u8 rev_key[SERPENT_MAX_KEY_SIZE]; + int i; + + if ((keylen < SERPENT_MIN_KEY_SIZE) + || (keylen > SERPENT_MAX_KEY_SIZE)) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + for (i = 0; i < keylen; ++i) + rev_key[keylen - i - 1] = key[i]; + + return serpent_setkey(ctx, rev_key, keylen, flags); +} + +static void tnepres_encrypt(void *ctx, u8 *dst, const u8 *src) +{ + const u32 * const s = (const u32 * const)src; + u32 * const d = (u32 * const)dst; + + u32 rs[4], rd[4]; + + rs[0] = swab32(s[3]); + rs[1] = swab32(s[2]); + rs[2] = swab32(s[1]); + rs[3] = swab32(s[0]); + + serpent_encrypt(ctx, (u8 *)rd, (u8 *)rs); + + d[0] = swab32(rd[3]); + d[1] = swab32(rd[2]); + d[2] = swab32(rd[1]); + d[3] = swab32(rd[0]); +} + +static void tnepres_decrypt(void *ctx, u8 *dst, const u8 *src) +{ + const u32 * const s = (const u32 * const)src; + u32 * const d = (u32 * const)dst; + + u32 rs[4], rd[4]; + + rs[0] = swab32(s[3]); + rs[1] = swab32(s[2]); + rs[2] = swab32(s[1]); + rs[3] = swab32(s[0]); + + serpent_decrypt(ctx, (u8 *)rd, (u8 *)rs); + + d[0] = swab32(rd[3]); + d[1] = swab32(rd[2]); + d[2] = swab32(rd[1]); + d[3] = swab32(rd[0]); +} + +static struct crypto_alg tnepres_alg = { + .cra_name = "tnepres", + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_alg.cra_list), + .cra_u = { .cipher = { + .cia_min_keysize = SERPENT_MIN_KEY_SIZE, + .cia_max_keysize = SERPENT_MAX_KEY_SIZE, + .cia_setkey = tnepres_setkey, + .cia_encrypt = tnepres_encrypt, + .cia_decrypt = tnepres_decrypt } } }; static int __init init(void) { - return crypto_register_alg(&serpent_alg); + int ret = crypto_register_alg(&serpent_alg); + + if (ret) + return ret; + + ret = crypto_register_alg(&tnepres_alg); + + if (ret) + crypto_unregister_alg(&serpent_alg); + + return ret; } static void __exit fini(void) { + crypto_unregister_alg(&tnepres_alg); crypto_unregister_alg(&serpent_alg); } @@ -502,5 +590,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Serpent Cipher Algorithm"); +MODULE_DESCRIPTION("Serpent and tnepres (kerneli compatible serpent reversed) Cipher Algorithm"); MODULE_AUTHOR("Dag Arne Osvik "); diff -urN linux-2.4.27/crypto/tcrypt.c linux-2.4.28/crypto/tcrypt.c --- linux-2.4.27/crypto/tcrypt.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/crypto/tcrypt.c 2004-11-17 03:54:21.176380543 -0800 @@ -63,7 +63,8 @@ static char *check[] = { "des", "md5", "des3_ede", "rot13", "sha1", "sha256", "blowfish", "twofish", "serpent", "sha384", "sha512", "md4", "aes", "cast6", - "arc4", "michael_mic", "deflate", "tea", "xtea", NULL + "arc4", "michael_mic", "deflate", "tea", "xtea", "khazad", + "wp512", "wp384", "wp256", "tnepres", "anubis", NULL }; static void @@ -550,6 +551,10 @@ test_cipher ("serpent", MODE_ECB, ENCRYPT, serpent_enc_tv_template, SERPENT_ENC_TEST_VECTORS); test_cipher ("serpent", MODE_ECB, DECRYPT, serpent_dec_tv_template, SERPENT_DEC_TEST_VECTORS); + //TNEPRES + test_cipher ("tnepres", MODE_ECB, ENCRYPT, tnepres_enc_tv_template, TNEPRES_ENC_TEST_VECTORS); + test_cipher ("tnepres", MODE_ECB, DECRYPT, tnepres_dec_tv_template, TNEPRES_DEC_TEST_VECTORS); + //AES test_cipher ("aes", MODE_ECB, ENCRYPT, aes_enc_tv_template, AES_ENC_TEST_VECTORS); test_cipher ("aes", MODE_ECB, DECRYPT, aes_dec_tv_template, AES_DEC_TEST_VECTORS); @@ -575,8 +580,21 @@ test_cipher ("xtea", MODE_ECB, ENCRYPT, xtea_enc_tv_template, XTEA_ENC_TEST_VECTORS); test_cipher ("xtea", MODE_ECB, DECRYPT, xtea_dec_tv_template, XTEA_DEC_TEST_VECTORS); + //KHAZAD + test_cipher ("khazad", MODE_ECB, ENCRYPT, khazad_enc_tv_template, KHAZAD_ENC_TEST_VECTORS); + test_cipher ("khazad", MODE_ECB, DECRYPT, khazad_dec_tv_template, KHAZAD_DEC_TEST_VECTORS); + + //ANUBIS + test_cipher ("anubis", MODE_ECB, ENCRYPT, anubis_enc_tv_template, ANUBIS_ENC_TEST_VECTORS); + test_cipher ("anubis", MODE_ECB, DECRYPT, anubis_dec_tv_template, ANUBIS_DEC_TEST_VECTORS); + test_cipher ("anubis", MODE_CBC, ENCRYPT, anubis_cbc_enc_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS); + test_cipher ("anubis", MODE_CBC, DECRYPT, anubis_cbc_dec_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS); + test_hash("sha384", sha384_tv_template, SHA384_TEST_VECTORS); test_hash("sha512", sha512_tv_template, SHA512_TEST_VECTORS); + test_hash("wp512", wp512_tv_template, WP512_TEST_VECTORS); + test_hash("wp384", wp384_tv_template, WP384_TEST_VECTORS); + test_hash("wp256", wp256_tv_template, WP256_TEST_VECTORS); test_deflate(); #ifdef CONFIG_CRYPTO_HMAC test_hmac("md5", hmac_md5_tv_template, HMAC_MD5_TEST_VECTORS); @@ -630,6 +648,8 @@ break; case 9: + test_cipher ("serpent", MODE_ECB, ENCRYPT, serpent_enc_tv_template, SERPENT_ENC_TEST_VECTORS); + test_cipher ("serpent", MODE_ECB, DECRYPT, serpent_dec_tv_template, SERPENT_DEC_TEST_VECTORS); break; case 10: @@ -678,6 +698,33 @@ test_cipher ("xtea", MODE_ECB, DECRYPT, xtea_dec_tv_template, XTEA_DEC_TEST_VECTORS); break; + case 21: + test_cipher ("khazad", MODE_ECB, ENCRYPT, khazad_enc_tv_template, KHAZAD_ENC_TEST_VECTORS); + test_cipher ("khazad", MODE_ECB, DECRYPT, khazad_dec_tv_template, KHAZAD_DEC_TEST_VECTORS); + case 22: + test_hash("wp512", wp512_tv_template, WP512_TEST_VECTORS); + break; + + case 23: + test_hash("wp384", wp384_tv_template, WP384_TEST_VECTORS); + break; + + case 24: + test_hash("wp256", wp256_tv_template, WP256_TEST_VECTORS); + break; + + case 25: + test_cipher ("tnepres", MODE_ECB, ENCRYPT, tnepres_enc_tv_template, TNEPRES_ENC_TEST_VECTORS); + test_cipher ("tnepres", MODE_ECB, DECRYPT, tnepres_dec_tv_template, TNEPRES_DEC_TEST_VECTORS); + break; + + case 26: + test_cipher ("anubis", MODE_ECB, ENCRYPT, anubis_enc_tv_template, ANUBIS_ENC_TEST_VECTORS); + test_cipher ("anubis", MODE_ECB, DECRYPT, anubis_dec_tv_template, ANUBIS_DEC_TEST_VECTORS); + test_cipher ("anubis", MODE_CBC, ENCRYPT, anubis_cbc_enc_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS); + test_cipher ("anubis", MODE_CBC, DECRYPT, anubis_cbc_dec_tv_template, ANUBIS_CBC_ENC_TEST_VECTORS); + break; + #ifdef CONFIG_CRYPTO_HMAC case 100: test_hmac("md5", hmac_md5_tv_template, HMAC_MD5_TEST_VECTORS); diff -urN linux-2.4.27/crypto/tcrypt.h linux-2.4.28/crypto/tcrypt.h --- linux-2.4.27/crypto/tcrypt.h 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/crypto/tcrypt.h 2004-11-17 03:54:21.179380666 -0800 @@ -551,6 +551,257 @@ #endif /* CONFIG_CRYPTO_HMAC */ /* + * WHIRLPOOL test vectors from Whirlpool package + * by Vincent Rijmen and Paulo S. L. M. Barreto as part of the NESSIE + * submission + */ +#define WP512_TEST_VECTORS 8 + +struct hash_testvec wp512_tv_template[] = { + { + .plaintext = "", + .psize = 0, + .digest = { 0x19, 0xFA, 0x61, 0xD7, 0x55, 0x22, 0xA4, 0x66, + 0x9B, 0x44, 0xE3, 0x9C, 0x1D, 0x2E, 0x17, 0x26, + 0xC5, 0x30, 0x23, 0x21, 0x30, 0xD4, 0x07, 0xF8, + 0x9A, 0xFE, 0xE0, 0x96, 0x49, 0x97, 0xF7, 0xA7, + 0x3E, 0x83, 0xBE, 0x69, 0x8B, 0x28, 0x8F, 0xEB, + 0xCF, 0x88, 0xE3, 0xE0, 0x3C, 0x4F, 0x07, 0x57, + 0xEA, 0x89, 0x64, 0xE5, 0x9B, 0x63, 0xD9, 0x37, + 0x08, 0xB1, 0x38, 0xCC, 0x42, 0xA6, 0x6E, 0xB3 }, + + + }, { + .plaintext = "a", + .psize = 1, + .digest = { 0x8A, 0xCA, 0x26, 0x02, 0x79, 0x2A, 0xEC, 0x6F, + 0x11, 0xA6, 0x72, 0x06, 0x53, 0x1F, 0xB7, 0xD7, + 0xF0, 0xDF, 0xF5, 0x94, 0x13, 0x14, 0x5E, 0x69, + 0x73, 0xC4, 0x50, 0x01, 0xD0, 0x08, 0x7B, 0x42, + 0xD1, 0x1B, 0xC6, 0x45, 0x41, 0x3A, 0xEF, 0xF6, + 0x3A, 0x42, 0x39, 0x1A, 0x39, 0x14, 0x5A, 0x59, + 0x1A, 0x92, 0x20, 0x0D, 0x56, 0x01, 0x95, 0xE5, + 0x3B, 0x47, 0x85, 0x84, 0xFD, 0xAE, 0x23, 0x1A }, + }, { + .plaintext = "abc", + .psize = 3, + .digest = { 0x4E, 0x24, 0x48, 0xA4, 0xC6, 0xF4, 0x86, 0xBB, + 0x16, 0xB6, 0x56, 0x2C, 0x73, 0xB4, 0x02, 0x0B, + 0xF3, 0x04, 0x3E, 0x3A, 0x73, 0x1B, 0xCE, 0x72, + 0x1A, 0xE1, 0xB3, 0x03, 0xD9, 0x7E, 0x6D, 0x4C, + 0x71, 0x81, 0xEE, 0xBD, 0xB6, 0xC5, 0x7E, 0x27, + 0x7D, 0x0E, 0x34, 0x95, 0x71, 0x14, 0xCB, 0xD6, + 0xC7, 0x97, 0xFC, 0x9D, 0x95, 0xD8, 0xB5, 0x82, + 0xD2, 0x25, 0x29, 0x20, 0x76, 0xD4, 0xEE, 0xF5 }, + }, { + .plaintext = "message digest", + .psize = 14, + .digest = { 0x37, 0x8C, 0x84, 0xA4, 0x12, 0x6E, 0x2D, 0xC6, + 0xE5, 0x6D, 0xCC, 0x74, 0x58, 0x37, 0x7A, 0xAC, + 0x83, 0x8D, 0x00, 0x03, 0x22, 0x30, 0xF5, 0x3C, + 0xE1, 0xF5, 0x70, 0x0C, 0x0F, 0xFB, 0x4D, 0x3B, + 0x84, 0x21, 0x55, 0x76, 0x59, 0xEF, 0x55, 0xC1, + 0x06, 0xB4, 0xB5, 0x2A, 0xC5, 0xA4, 0xAA, 0xA6, + 0x92, 0xED, 0x92, 0x00, 0x52, 0x83, 0x8F, 0x33, + 0x62, 0xE8, 0x6D, 0xBD, 0x37, 0xA8, 0x90, 0x3E }, + }, { + .plaintext = "abcdefghijklmnopqrstuvwxyz", + .psize = 26, + .digest = { 0xF1, 0xD7, 0x54, 0x66, 0x26, 0x36, 0xFF, 0xE9, + 0x2C, 0x82, 0xEB, 0xB9, 0x21, 0x2A, 0x48, 0x4A, + 0x8D, 0x38, 0x63, 0x1E, 0xAD, 0x42, 0x38, 0xF5, + 0x44, 0x2E, 0xE1, 0x3B, 0x80, 0x54, 0xE4, 0x1B, + 0x08, 0xBF, 0x2A, 0x92, 0x51, 0xC3, 0x0B, 0x6A, + 0x0B, 0x8A, 0xAE, 0x86, 0x17, 0x7A, 0xB4, 0xA6, + 0xF6, 0x8F, 0x67, 0x3E, 0x72, 0x07, 0x86, 0x5D, + 0x5D, 0x98, 0x19, 0xA3, 0xDB, 0xA4, 0xEB, 0x3B }, + }, { + .plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz0123456789", + .psize = 62, + .digest = { 0xDC, 0x37, 0xE0, 0x08, 0xCF, 0x9E, 0xE6, 0x9B, + 0xF1, 0x1F, 0x00, 0xED, 0x9A, 0xBA, 0x26, 0x90, + 0x1D, 0xD7, 0xC2, 0x8C, 0xDE, 0xC0, 0x66, 0xCC, + 0x6A, 0xF4, 0x2E, 0x40, 0xF8, 0x2F, 0x3A, 0x1E, + 0x08, 0xEB, 0xA2, 0x66, 0x29, 0x12, 0x9D, 0x8F, + 0xB7, 0xCB, 0x57, 0x21, 0x1B, 0x92, 0x81, 0xA6, + 0x55, 0x17, 0xCC, 0x87, 0x9D, 0x7B, 0x96, 0x21, + 0x42, 0xC6, 0x5F, 0x5A, 0x7A, 0xF0, 0x14, 0x67 }, + }, { + .plaintext = "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890", + .psize = 80, + .digest = { 0x46, 0x6E, 0xF1, 0x8B, 0xAB, 0xB0, 0x15, 0x4D, + 0x25, 0xB9, 0xD3, 0x8A, 0x64, 0x14, 0xF5, 0xC0, + 0x87, 0x84, 0x37, 0x2B, 0xCC, 0xB2, 0x04, 0xD6, + 0x54, 0x9C, 0x4A, 0xFA, 0xDB, 0x60, 0x14, 0x29, + 0x4D, 0x5B, 0xD8, 0xDF, 0x2A, 0x6C, 0x44, 0xE5, + 0x38, 0xCD, 0x04, 0x7B, 0x26, 0x81, 0xA5, 0x1A, + 0x2C, 0x60, 0x48, 0x1E, 0x88, 0xC5, 0xA2, 0x0B, + 0x2C, 0x2A, 0x80, 0xCF, 0x3A, 0x9A, 0x08, 0x3B }, + }, { + .plaintext = "abcdbcdecdefdefgefghfghighijhijk", + .psize = 32, + .digest = { 0x2A, 0x98, 0x7E, 0xA4, 0x0F, 0x91, 0x70, 0x61, + 0xF5, 0xD6, 0xF0, 0xA0, 0xE4, 0x64, 0x4F, 0x48, + 0x8A, 0x7A, 0x5A, 0x52, 0xDE, 0xEE, 0x65, 0x62, + 0x07, 0xC5, 0x62, 0xF9, 0x88, 0xE9, 0x5C, 0x69, + 0x16, 0xBD, 0xC8, 0x03, 0x1B, 0xC5, 0xBE, 0x1B, + 0x7B, 0x94, 0x76, 0x39, 0xFE, 0x05, 0x0B, 0x56, + 0x93, 0x9B, 0xAA, 0xA0, 0xAD, 0xFF, 0x9A, 0xE6, + 0x74, 0x5B, 0x7B, 0x18, 0x1C, 0x3B, 0xE3, 0xFD }, + }, +}; + +#define WP384_TEST_VECTORS 8 + +struct hash_testvec wp384_tv_template[] = { + { + .plaintext = "", + .psize = 0, + .digest = { 0x19, 0xFA, 0x61, 0xD7, 0x55, 0x22, 0xA4, 0x66, + 0x9B, 0x44, 0xE3, 0x9C, 0x1D, 0x2E, 0x17, 0x26, + 0xC5, 0x30, 0x23, 0x21, 0x30, 0xD4, 0x07, 0xF8, + 0x9A, 0xFE, 0xE0, 0x96, 0x49, 0x97, 0xF7, 0xA7, + 0x3E, 0x83, 0xBE, 0x69, 0x8B, 0x28, 0x8F, 0xEB, + 0xCF, 0x88, 0xE3, 0xE0, 0x3C, 0x4F, 0x07, 0x57 }, + + + }, { + .plaintext = "a", + .psize = 1, + .digest = { 0x8A, 0xCA, 0x26, 0x02, 0x79, 0x2A, 0xEC, 0x6F, + 0x11, 0xA6, 0x72, 0x06, 0x53, 0x1F, 0xB7, 0xD7, + 0xF0, 0xDF, 0xF5, 0x94, 0x13, 0x14, 0x5E, 0x69, + 0x73, 0xC4, 0x50, 0x01, 0xD0, 0x08, 0x7B, 0x42, + 0xD1, 0x1B, 0xC6, 0x45, 0x41, 0x3A, 0xEF, 0xF6, + 0x3A, 0x42, 0x39, 0x1A, 0x39, 0x14, 0x5A, 0x59 }, + }, { + .plaintext = "abc", + .psize = 3, + .digest = { 0x4E, 0x24, 0x48, 0xA4, 0xC6, 0xF4, 0x86, 0xBB, + 0x16, 0xB6, 0x56, 0x2C, 0x73, 0xB4, 0x02, 0x0B, + 0xF3, 0x04, 0x3E, 0x3A, 0x73, 0x1B, 0xCE, 0x72, + 0x1A, 0xE1, 0xB3, 0x03, 0xD9, 0x7E, 0x6D, 0x4C, + 0x71, 0x81, 0xEE, 0xBD, 0xB6, 0xC5, 0x7E, 0x27, + 0x7D, 0x0E, 0x34, 0x95, 0x71, 0x14, 0xCB, 0xD6 }, + }, { + .plaintext = "message digest", + .psize = 14, + .digest = { 0x37, 0x8C, 0x84, 0xA4, 0x12, 0x6E, 0x2D, 0xC6, + 0xE5, 0x6D, 0xCC, 0x74, 0x58, 0x37, 0x7A, 0xAC, + 0x83, 0x8D, 0x00, 0x03, 0x22, 0x30, 0xF5, 0x3C, + 0xE1, 0xF5, 0x70, 0x0C, 0x0F, 0xFB, 0x4D, 0x3B, + 0x84, 0x21, 0x55, 0x76, 0x59, 0xEF, 0x55, 0xC1, + 0x06, 0xB4, 0xB5, 0x2A, 0xC5, 0xA4, 0xAA, 0xA6 }, + }, { + .plaintext = "abcdefghijklmnopqrstuvwxyz", + .psize = 26, + .digest = { 0xF1, 0xD7, 0x54, 0x66, 0x26, 0x36, 0xFF, 0xE9, + 0x2C, 0x82, 0xEB, 0xB9, 0x21, 0x2A, 0x48, 0x4A, + 0x8D, 0x38, 0x63, 0x1E, 0xAD, 0x42, 0x38, 0xF5, + 0x44, 0x2E, 0xE1, 0x3B, 0x80, 0x54, 0xE4, 0x1B, + 0x08, 0xBF, 0x2A, 0x92, 0x51, 0xC3, 0x0B, 0x6A, + 0x0B, 0x8A, 0xAE, 0x86, 0x17, 0x7A, 0xB4, 0xA6 }, + }, { + .plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz0123456789", + .psize = 62, + .digest = { 0xDC, 0x37, 0xE0, 0x08, 0xCF, 0x9E, 0xE6, 0x9B, + 0xF1, 0x1F, 0x00, 0xED, 0x9A, 0xBA, 0x26, 0x90, + 0x1D, 0xD7, 0xC2, 0x8C, 0xDE, 0xC0, 0x66, 0xCC, + 0x6A, 0xF4, 0x2E, 0x40, 0xF8, 0x2F, 0x3A, 0x1E, + 0x08, 0xEB, 0xA2, 0x66, 0x29, 0x12, 0x9D, 0x8F, + 0xB7, 0xCB, 0x57, 0x21, 0x1B, 0x92, 0x81, 0xA6 }, + }, { + .plaintext = "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890", + .psize = 80, + .digest = { 0x46, 0x6E, 0xF1, 0x8B, 0xAB, 0xB0, 0x15, 0x4D, + 0x25, 0xB9, 0xD3, 0x8A, 0x64, 0x14, 0xF5, 0xC0, + 0x87, 0x84, 0x37, 0x2B, 0xCC, 0xB2, 0x04, 0xD6, + 0x54, 0x9C, 0x4A, 0xFA, 0xDB, 0x60, 0x14, 0x29, + 0x4D, 0x5B, 0xD8, 0xDF, 0x2A, 0x6C, 0x44, 0xE5, + 0x38, 0xCD, 0x04, 0x7B, 0x26, 0x81, 0xA5, 0x1A }, + }, { + .plaintext = "abcdbcdecdefdefgefghfghighijhijk", + .psize = 32, + .digest = { 0x2A, 0x98, 0x7E, 0xA4, 0x0F, 0x91, 0x70, 0x61, + 0xF5, 0xD6, 0xF0, 0xA0, 0xE4, 0x64, 0x4F, 0x48, + 0x8A, 0x7A, 0x5A, 0x52, 0xDE, 0xEE, 0x65, 0x62, + 0x07, 0xC5, 0x62, 0xF9, 0x88, 0xE9, 0x5C, 0x69, + 0x16, 0xBD, 0xC8, 0x03, 0x1B, 0xC5, 0xBE, 0x1B, + 0x7B, 0x94, 0x76, 0x39, 0xFE, 0x05, 0x0B, 0x56 }, + }, +}; + +#define WP256_TEST_VECTORS 8 + +struct hash_testvec wp256_tv_template[] = { + { + .plaintext = "", + .psize = 0, + .digest = { 0x19, 0xFA, 0x61, 0xD7, 0x55, 0x22, 0xA4, 0x66, + 0x9B, 0x44, 0xE3, 0x9C, 0x1D, 0x2E, 0x17, 0x26, + 0xC5, 0x30, 0x23, 0x21, 0x30, 0xD4, 0x07, 0xF8, + 0x9A, 0xFE, 0xE0, 0x96, 0x49, 0x97, 0xF7, 0xA7 }, + + + }, { + .plaintext = "a", + .psize = 1, + .digest = { 0x8A, 0xCA, 0x26, 0x02, 0x79, 0x2A, 0xEC, 0x6F, + 0x11, 0xA6, 0x72, 0x06, 0x53, 0x1F, 0xB7, 0xD7, + 0xF0, 0xDF, 0xF5, 0x94, 0x13, 0x14, 0x5E, 0x69, + 0x73, 0xC4, 0x50, 0x01, 0xD0, 0x08, 0x7B, 0x42 }, + }, { + .plaintext = "abc", + .psize = 3, + .digest = { 0x4E, 0x24, 0x48, 0xA4, 0xC6, 0xF4, 0x86, 0xBB, + 0x16, 0xB6, 0x56, 0x2C, 0x73, 0xB4, 0x02, 0x0B, + 0xF3, 0x04, 0x3E, 0x3A, 0x73, 0x1B, 0xCE, 0x72, + 0x1A, 0xE1, 0xB3, 0x03, 0xD9, 0x7E, 0x6D, 0x4C }, + }, { + .plaintext = "message digest", + .psize = 14, + .digest = { 0x37, 0x8C, 0x84, 0xA4, 0x12, 0x6E, 0x2D, 0xC6, + 0xE5, 0x6D, 0xCC, 0x74, 0x58, 0x37, 0x7A, 0xAC, + 0x83, 0x8D, 0x00, 0x03, 0x22, 0x30, 0xF5, 0x3C, + 0xE1, 0xF5, 0x70, 0x0C, 0x0F, 0xFB, 0x4D, 0x3B }, + }, { + .plaintext = "abcdefghijklmnopqrstuvwxyz", + .psize = 26, + .digest = { 0xF1, 0xD7, 0x54, 0x66, 0x26, 0x36, 0xFF, 0xE9, + 0x2C, 0x82, 0xEB, 0xB9, 0x21, 0x2A, 0x48, 0x4A, + 0x8D, 0x38, 0x63, 0x1E, 0xAD, 0x42, 0x38, 0xF5, + 0x44, 0x2E, 0xE1, 0x3B, 0x80, 0x54, 0xE4, 0x1B }, + }, { + .plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz0123456789", + .psize = 62, + .digest = { 0xDC, 0x37, 0xE0, 0x08, 0xCF, 0x9E, 0xE6, 0x9B, + 0xF1, 0x1F, 0x00, 0xED, 0x9A, 0xBA, 0x26, 0x90, + 0x1D, 0xD7, 0xC2, 0x8C, 0xDE, 0xC0, 0x66, 0xCC, + 0x6A, 0xF4, 0x2E, 0x40, 0xF8, 0x2F, 0x3A, 0x1E }, + }, { + .plaintext = "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890", + .psize = 80, + .digest = { 0x46, 0x6E, 0xF1, 0x8B, 0xAB, 0xB0, 0x15, 0x4D, + 0x25, 0xB9, 0xD3, 0x8A, 0x64, 0x14, 0xF5, 0xC0, + 0x87, 0x84, 0x37, 0x2B, 0xCC, 0xB2, 0x04, 0xD6, + 0x54, 0x9C, 0x4A, 0xFA, 0xDB, 0x60, 0x14, 0x29 }, + }, { + .plaintext = "abcdbcdecdefdefgefghfghighijhijk", + .psize = 32, + .digest = { 0x2A, 0x98, 0x7E, 0xA4, 0x0F, 0x91, 0x70, 0x61, + 0xF5, 0xD6, 0xF0, 0xA0, 0xE4, 0x64, 0x4F, 0x48, + 0x8A, 0x7A, 0x5A, 0x52, 0xDE, 0xEE, 0x65, 0x62, + 0x07, 0xC5, 0x62, 0xF9, 0x88, 0xE9, 0x5C, 0x69 }, + }, +}; + +/* * DES test vectors. */ #define DES_ENC_TEST_VECTORS 10 @@ -1186,11 +1437,14 @@ /* * Serpent test vectors. These are backwards because Serpent writes - * octect sequences in right-to-left mode. + * octet sequences in right-to-left mode. */ #define SERPENT_ENC_TEST_VECTORS 4 #define SERPENT_DEC_TEST_VECTORS 4 +#define TNEPRES_ENC_TEST_VECTORS 4 +#define TNEPRES_DEC_TEST_VECTORS 4 + struct cipher_testvec serpent_enc_tv_template[] = { { @@ -1233,6 +1487,57 @@ }, }; +struct cipher_testvec tnepres_enc_tv_template[] = +{ + { /* KeySize=128, PT=0, I=1 */ + .input = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .key = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .klen = 16, + .ilen = 16, + .result = { 0x49, 0xaf, 0xbf, 0xad, 0x9d, 0x5a, 0x34, 0x05, + 0x2c, 0xd8, 0xff, 0xa5, 0x98, 0x6b, 0xd2, 0xdd }, + .rlen = 16, + }, { /* KeySize=192, PT=0, I=1 */ + .key = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .klen = 24, + .input = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .ilen = 16, + .result = { 0xe7, 0x8e, 0x54, 0x02, 0xc7, 0x19, 0x55, 0x68, + 0xac, 0x36, 0x78, 0xf7, 0xa3, 0xf6, 0x0c, 0x66 }, + .rlen = 16, + }, { /* KeySize=256, PT=0, I=1 */ + .key = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .klen = 32, + .input = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .ilen = 16, + .result = { 0xab, 0xed, 0x96, 0xe7, 0x66, 0xbf, 0x28, 0xcb, + 0xc0, 0xeb, 0xd2, 0x1a, 0x82, 0xef, 0x08, 0x19 }, + .rlen = 16, + }, { /* KeySize=256, I=257 */ + .key = { 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, + 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 }, + .klen = 32, + .input = { 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 }, + .ilen = 16, + .result = { 0x5c, 0xe7, 0x1c, 0x70, 0xd2, 0x88, 0x2e, 0x5b, + 0xb8, 0x32, 0xe4, 0x33, 0xf8, 0x9f, 0x26, 0xde }, + .rlen = 16, + }, +}; + + struct cipher_testvec serpent_dec_tv_template[] = { { @@ -1275,6 +1580,49 @@ }, }; +struct cipher_testvec tnepres_dec_tv_template[] = +{ + { + .input = { 0x41, 0xcc, 0x6b, 0x31, 0x59, 0x31, 0x45, 0x97, + 0x6d, 0x6f, 0xbb, 0x38, 0x4b, 0x37, 0x21, 0x28 }, + .ilen = 16, + .result = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .rlen = 16, + }, { + .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .klen = 16, + .input = { 0xea, 0xf4, 0xd7, 0xfc, 0xd8, 0x01, 0x34, 0x47, + 0x81, 0x45, 0x0b, 0xfa, 0x0c, 0xd6, 0xad, 0x6e }, + .ilen = 16, + .result = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .rlen = 16, + }, { + .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }, + .klen = 32, + .input = { 0x64, 0xa9, 0x1a, 0x37, 0xed, 0x9f, 0xe7, 0x49, + 0xa8, 0x4e, 0x76, 0xd6, 0xf5, 0x0d, 0x78, 0xee }, + .ilen = 16, + .result = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + .rlen = 16, + }, { /* KeySize=128, I=121 */ + .key = { [15] = 0x80 }, + .klen = 16, + .input = { 0x3d, 0xda, 0xbf, 0xc0, 0x06, 0xda, 0xab, 0x06, + 0x46, 0x2a, 0xf4, 0xef, 0x81, 0x54, 0x4e, 0x26 }, + .ilen = 16, + .result = { [0 ... 15] = 0x00 }, + .rlen = 16, + }, +}; + + /* Cast6 test vectors from RFC 2612 */ #define CAST6_ENC_TEST_VECTORS 3 #define CAST6_DEC_TEST_VECTORS 3 @@ -1818,7 +2166,308 @@ } }; +/* + * KHAZAD test vectors. + */ +#define KHAZAD_ENC_TEST_VECTORS 5 +#define KHAZAD_DEC_TEST_VECTORS 5 + +struct cipher_testvec khazad_enc_tv_template[] = { + { + .key = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .klen = 16, + .input = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .ilen = 8, + .result = { 0x49, 0xa4, 0xce, 0x32, 0xac, 0x19, 0x0e, 0x3f }, + .rlen = 8, + }, { + .key = { 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, + 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38 }, + .klen = 16, + .input = { 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38 }, + .ilen = 8, + .result = { 0x7e, 0x82, 0x12, 0xa1, 0Xd9, 0X5b, 0Xe4, 0Xf9 }, + .rlen = 8, + }, { + .key = { 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, + 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2 }, + .klen = 16, + .input = { 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2 }, + .ilen = 8, + .result = { 0Xaa, 0Xbe, 0Xc1, 0X95, 0Xc5, 0X94, 0X1a, 0X9c }, + .rlen = 8, + }, { + .key = { 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, + 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f }, + .klen = 16, + .input = { 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f }, + .ilen = 8, + .result = { 0X04, 0X74, 0Xf5, 0X70, 0X50, 0X16, 0Xd3, 0Xb8 }, + .rlen = 8, + }, { + .key = { 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, + 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f }, + .klen = 16, + .input = { 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f , + 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f }, + .ilen = 16, + .result = { 0X04, 0X74, 0Xf5, 0X70, 0X50, 0X16, 0Xd3, 0Xb8 , + 0X04, 0X74, 0Xf5, 0X70, 0X50, 0X16, 0Xd3, 0Xb8 }, + .rlen = 16, + }, +}; + +struct cipher_testvec khazad_dec_tv_template[] = { + { + .key = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .klen = 16, + .input = { 0X49, 0Xa4, 0Xce, 0X32, 0Xac, 0X19, 0X0e, 0X3f }, + .ilen = 8, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .rlen = 8, + }, { + .key = { 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, + 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38 }, + .klen = 16, + .input = { 0X7e, 0X82, 0X12, 0Xa1, 0Xd9, 0X5b, 0Xe4, 0Xf9 }, + .ilen = 8, + .result = { 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38 }, + .rlen = 8, + }, { + .key = { 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, + 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2 }, + .klen = 16, + .input = { 0Xaa, 0Xbe, 0Xc1, 0X95, 0Xc5, 0X94, 0X1a, 0X9c }, + .ilen = 8, + .result = { 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2, 0Xa2 }, + .rlen = 8, + }, { + .key = { 0x2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, + 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f }, + .klen = 16, + .input = { 0X04, 0X74, 0Xf5, 0X70, 0X50, 0X16, 0Xd3, 0Xb8 }, + .ilen = 8, + .result = { 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f }, + .rlen = 8, + }, { + .key = { 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, + 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f }, + .klen = 16, + .input = { 0X04, 0X74, 0Xf5, 0X70, 0X50, 0X16, 0Xd3, 0Xb8 , + 0X04, 0X74, 0Xf5, 0X70, 0X50, 0X16, 0Xd3, 0Xb8 }, + .ilen = 16, + .result = { 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f , + 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f, 0X2f }, + .rlen = 16, + }, +}; + +/* + * Anubis test vectors. + */ + +#define ANUBIS_ENC_TEST_VECTORS 5 +#define ANUBIS_DEC_TEST_VECTORS 5 +#define ANUBIS_CBC_ENC_TEST_VECTORS 2 +#define ANUBIS_CBC_DEC_TEST_VECTORS 2 + +struct cipher_testvec anubis_enc_tv_template[] = { + { + .key = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe }, + .klen = 16, + .input = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe }, + .ilen = 16, + .result = { 0x6d, 0xc5, 0xda, 0xa2, 0x26, 0x7d, 0x62, 0x6f, + 0x08, 0xb7, 0x52, 0x8e, 0x6e, 0x6e, 0x86, 0x90 }, + .rlen = 16, + }, { + + .key = { 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03 }, + .klen = 20, + .input = { 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 }, + .ilen = 16, + .result = { 0xdb, 0xf1, 0x42, 0xf4, 0xd1, 0x8a, 0xc7, 0x49, + 0x87, 0x41, 0x6f, 0x82, 0x0a, 0x98, 0x64, 0xae }, + .rlen = 16, + }, { + .key = { 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, + 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, + 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, + 0x24, 0x24, 0x24, 0x24 }, + .klen = 28, + .input = { 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, + 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24 }, + .ilen = 16, + .result = { 0xfd, 0x1b, 0x4a, 0xe3, 0xbf, 0xf0, 0xad, 0x3d, + 0x06, 0xd3, 0x61, 0x27, 0xfd, 0x13, 0x9e, 0xde }, + .rlen = 16, + }, { + .key = { 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, + 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, + 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, + 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25 }, + .klen = 32, + .input = { 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, + 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25 }, + .ilen = 16, + .result = { 0x1a, 0x91, 0xfb, 0x2b, 0xb7, 0x78, 0x6b, 0xc4, + 0x17, 0xd9, 0xff, 0x40, 0x3b, 0x0e, 0xe5, 0xfe }, + .rlen = 16, + }, { + .key = { 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35 }, + .klen = 40, + .input = { 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35 }, + .ilen = 16, + .result = { 0xa5, 0x2c, 0x85, 0x6f, 0x9c, 0xba, 0xa0, 0x97, + 0x9e, 0xc6, 0x84, 0x0f, 0x17, 0x21, 0x07, 0xee }, + .rlen = 16, + }, +}; + +struct cipher_testvec anubis_dec_tv_template[] = { + { + .key = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe }, + .klen = 16, + .input = { 0x6d, 0xc5, 0xda, 0xa2, 0x26, 0x7d, 0x62, 0x6f, + 0x08, 0xb7, 0x52, 0x8e, 0x6e, 0x6e, 0x86, 0x90 }, + .ilen = 16, + .result = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe }, + .rlen = 16, + }, { + + .key = { 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03 }, + .klen = 20, + .input = { 0xdb, 0xf1, 0x42, 0xf4, 0xd1, 0x8a, 0xc7, 0x49, + 0x87, 0x41, 0x6f, 0x82, 0x0a, 0x98, 0x64, 0xae }, + .ilen = 16, + .result = { 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 }, + .rlen = 16, + }, { + .key = { 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, + 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, + 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, + 0x24, 0x24, 0x24, 0x24 }, + .klen = 28, + .input = { 0xfd, 0x1b, 0x4a, 0xe3, 0xbf, 0xf0, 0xad, 0x3d, + 0x06, 0xd3, 0x61, 0x27, 0xfd, 0x13, 0x9e, 0xde }, + .ilen = 16, + .result = { 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, + 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24 }, + .rlen = 16, + }, { + .key = { 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, + 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, + 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, + 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25 }, + .klen = 32, + .input = { 0x1a, 0x91, 0xfb, 0x2b, 0xb7, 0x78, 0x6b, 0xc4, + 0x17, 0xd9, 0xff, 0x40, 0x3b, 0x0e, 0xe5, 0xfe }, + .ilen = 16, + .result = { 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, + 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25 }, + .rlen = 16, + }, { + .key = { 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35 }, + .input = { 0xa5, 0x2c, 0x85, 0x6f, 0x9c, 0xba, 0xa0, 0x97, + 0x9e, 0xc6, 0x84, 0x0f, 0x17, 0x21, 0x07, 0xee }, + .klen = 40, + .ilen = 16, + .result = { 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35 }, + .rlen = 16, + }, +}; + +struct cipher_testvec anubis_cbc_enc_tv_template[] = { + { + .key = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe }, + .klen = 16, + .input = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe }, + .ilen = 32, + .result = { 0x6d, 0xc5, 0xda, 0xa2, 0x26, 0x7d, 0x62, 0x6f, + 0x08, 0xb7, 0x52, 0x8e, 0x6e, 0x6e, 0x86, 0x90, + 0x86, 0xd8, 0xb5, 0x6f, 0x98, 0x5e, 0x8a, 0x66, + 0x4f, 0x1f, 0x78, 0xa1, 0xbb, 0x37, 0xf1, 0xbe }, + .rlen = 32, + }, { + .key = { 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35 }, + .klen = 40, + .input = { 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35 }, + .ilen = 32, + .result = { 0xa5, 0x2c, 0x85, 0x6f, 0x9c, 0xba, 0xa0, 0x97, + 0x9e, 0xc6, 0x84, 0x0f, 0x17, 0x21, 0x07, 0xee, + 0xa2, 0xbc, 0x06, 0x98, 0xc6, 0x4b, 0xda, 0x75, + 0x2e, 0xaa, 0xbe, 0x58, 0xce, 0x01, 0x5b, 0xc7 }, + .rlen = 32, + }, +}; +struct cipher_testvec anubis_cbc_dec_tv_template[] = { + { + .key = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe }, + .klen = 16, + .input = { 0x6d, 0xc5, 0xda, 0xa2, 0x26, 0x7d, 0x62, 0x6f, + 0x08, 0xb7, 0x52, 0x8e, 0x6e, 0x6e, 0x86, 0x90, + 0x86, 0xd8, 0xb5, 0x6f, 0x98, 0x5e, 0x8a, 0x66, + 0x4f, 0x1f, 0x78, 0xa1, 0xbb, 0x37, 0xf1, 0xbe }, + .ilen = 32, + .result = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe }, + .rlen = 32, + }, { + .key = { 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35 }, + .klen = 40, + .input = { 0xa5, 0x2c, 0x85, 0x6f, 0x9c, 0xba, 0xa0, 0x97, + 0x9e, 0xc6, 0x84, 0x0f, 0x17, 0x21, 0x07, 0xee, + 0xa2, 0xbc, 0x06, 0x98, 0xc6, 0x4b, 0xda, 0x75, + 0x2e, 0xaa, 0xbe, 0x58, 0xce, 0x01, 0x5b, 0xc7 }, + .ilen = 32, + .result = { 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, + 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35 }, + .rlen = 32, + }, +}; /* * Compression stuff. diff -urN linux-2.4.27/crypto/tea.c linux-2.4.28/crypto/tea.c --- linux-2.4.27/crypto/tea.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/crypto/tea.c 2004-11-17 03:54:21.180380707 -0800 @@ -154,7 +154,7 @@ while (sum != limit) { y += (z << 4 ^ z >> 5) + (z ^ sum) + ctx->KEY[sum&3]; - sum += TEA_DELTA; + sum += XTEA_DELTA; z += (y << 4 ^ y >> 5) + (y ^ sum) + ctx->KEY[sum>>11 &3]; } diff -urN linux-2.4.27/crypto/twofish.c linux-2.4.28/crypto/twofish.c --- linux-2.4.27/crypto/twofish.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/crypto/twofish.c 2004-11-17 03:54:21.181380749 -0800 @@ -1,7 +1,7 @@ /* * Twofish for CryptoAPI * - * Originaly Twofish for GPG + * Originally Twofish for GPG * By Matthew Skala , July 26, 1998 * 256-bit key length added March 20, 1999 * Some modifications to reduce the text size by Werner Koch, April, 1998 @@ -514,7 +514,7 @@ * preprocessed through q0 and q1 respectively; for longer keys they are the * output of previous stages. j is the index of the first key byte to use. * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2 - * twice, doing the Psuedo-Hadamard Transform, and doing the necessary + * twice, doing the Pseudo-Hadamard Transform, and doing the necessary * rotations. Its parameters are: a, the array to write the results into, * j, the index of the first output entry, k and l, the preprocessed indices * for index 2i, and m and n, the preprocessed indices for index 2i+1. diff -urN linux-2.4.27/crypto/wp512.c linux-2.4.28/crypto/wp512.c --- linux-2.4.27/crypto/wp512.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.4.28/crypto/wp512.c 2004-11-17 03:54:21.184380872 -0800 @@ -0,0 +1,1205 @@ +/* + * Cryptographic API. + * + * Whirlpool hashing Algorithm + * + * The Whirlpool algorithm was developed by Paulo S. L. M. Barreto and + * Vincent Rijmen. It has been selected as one of cryptographic + * primitives by the NESSIE project http://www.cryptonessie.org/ + * + * The original authors have disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * By Aaron Grothe ajgrothe@yahoo.com, August 23, 2004 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#include +#include +#include +#include +#include + +#define WP512_DIGEST_SIZE 64 +#define WP384_DIGEST_SIZE 48 +#define WP256_DIGEST_SIZE 32 + +#define WP512_BLOCK_SIZE 64 +#define WP512_LENGTHBYTES 32 + +#define WHIRLPOOL_ROUNDS 10 + +struct wp512_ctx { + u8 bitLength[WP512_LENGTHBYTES]; + u8 buffer[WP512_BLOCK_SIZE]; + int bufferBits; + int bufferPos; + u64 hash[WP512_DIGEST_SIZE/8]; +}; + +/* + * Though Whirlpool is endianness-neutral, the encryption tables are listed + * in BIG-ENDIAN format, which is adopted throughout this implementation + * (but little-endian notation would be equally suitable if consistently + * employed). + */ + +static const u64 C0[256] = { + 0x18186018c07830d8ULL, 0x23238c2305af4626ULL, 0xc6c63fc67ef991b8ULL, + 0xe8e887e8136fcdfbULL, 0x878726874ca113cbULL, 0xb8b8dab8a9626d11ULL, + 0x0101040108050209ULL, 0x4f4f214f426e9e0dULL, 0x3636d836adee6c9bULL, + 0xa6a6a2a6590451ffULL, 0xd2d26fd2debdb90cULL, 0xf5f5f3f5fb06f70eULL, + 0x7979f979ef80f296ULL, 0x6f6fa16f5fcede30ULL, 0x91917e91fcef3f6dULL, + 0x52525552aa07a4f8ULL, 0x60609d6027fdc047ULL, 0xbcbccabc89766535ULL, + 0x9b9b569baccd2b37ULL, 0x8e8e028e048c018aULL, 0xa3a3b6a371155bd2ULL, + 0x0c0c300c603c186cULL, 0x7b7bf17bff8af684ULL, 0x3535d435b5e16a80ULL, + 0x1d1d741de8693af5ULL, 0xe0e0a7e05347ddb3ULL, 0xd7d77bd7f6acb321ULL, + 0xc2c22fc25eed999cULL, 0x2e2eb82e6d965c43ULL, 0x4b4b314b627a9629ULL, + 0xfefedffea321e15dULL, 0x575741578216aed5ULL, 0x15155415a8412abdULL, + 0x7777c1779fb6eee8ULL, 0x3737dc37a5eb6e92ULL, 0xe5e5b3e57b56d79eULL, + 0x9f9f469f8cd92313ULL, 0xf0f0e7f0d317fd23ULL, 0x4a4a354a6a7f9420ULL, + 0xdada4fda9e95a944ULL, 0x58587d58fa25b0a2ULL, 0xc9c903c906ca8fcfULL, + 0x2929a429558d527cULL, 0x0a0a280a5022145aULL, 0xb1b1feb1e14f7f50ULL, + 0xa0a0baa0691a5dc9ULL, 0x6b6bb16b7fdad614ULL, 0x85852e855cab17d9ULL, + 0xbdbdcebd8173673cULL, 0x5d5d695dd234ba8fULL, 0x1010401080502090ULL, + 0xf4f4f7f4f303f507ULL, 0xcbcb0bcb16c08bddULL, 0x3e3ef83eedc67cd3ULL, + 0x0505140528110a2dULL, 0x676781671fe6ce78ULL, 0xe4e4b7e47353d597ULL, + 0x27279c2725bb4e02ULL, 0x4141194132588273ULL, 0x8b8b168b2c9d0ba7ULL, + 0xa7a7a6a7510153f6ULL, 0x7d7de97dcf94fab2ULL, 0x95956e95dcfb3749ULL, + 0xd8d847d88e9fad56ULL, 0xfbfbcbfb8b30eb70ULL, 0xeeee9fee2371c1cdULL, + 0x7c7ced7cc791f8bbULL, 0x6666856617e3cc71ULL, 0xdddd53dda68ea77bULL, + 0x17175c17b84b2eafULL, 0x4747014702468e45ULL, 0x9e9e429e84dc211aULL, + 0xcaca0fca1ec589d4ULL, 0x2d2db42d75995a58ULL, 0xbfbfc6bf9179632eULL, + 0x07071c07381b0e3fULL, 0xadad8ead012347acULL, 0x5a5a755aea2fb4b0ULL, + 0x838336836cb51befULL, 0x3333cc3385ff66b6ULL, 0x636391633ff2c65cULL, + 0x02020802100a0412ULL, 0xaaaa92aa39384993ULL, 0x7171d971afa8e2deULL, + 0xc8c807c80ecf8dc6ULL, 0x19196419c87d32d1ULL, 0x494939497270923bULL, + 0xd9d943d9869aaf5fULL, 0xf2f2eff2c31df931ULL, 0xe3e3abe34b48dba8ULL, + 0x5b5b715be22ab6b9ULL, 0x88881a8834920dbcULL, 0x9a9a529aa4c8293eULL, + 0x262698262dbe4c0bULL, 0x3232c8328dfa64bfULL, 0xb0b0fab0e94a7d59ULL, + 0xe9e983e91b6acff2ULL, 0x0f0f3c0f78331e77ULL, 0xd5d573d5e6a6b733ULL, + 0x80803a8074ba1df4ULL, 0xbebec2be997c6127ULL, 0xcdcd13cd26de87ebULL, + 0x3434d034bde46889ULL, 0x48483d487a759032ULL, 0xffffdbffab24e354ULL, + 0x7a7af57af78ff48dULL, 0x90907a90f4ea3d64ULL, 0x5f5f615fc23ebe9dULL, + 0x202080201da0403dULL, 0x6868bd6867d5d00fULL, 0x1a1a681ad07234caULL, + 0xaeae82ae192c41b7ULL, 0xb4b4eab4c95e757dULL, 0x54544d549a19a8ceULL, + 0x93937693ece53b7fULL, 0x222288220daa442fULL, 0x64648d6407e9c863ULL, + 0xf1f1e3f1db12ff2aULL, 0x7373d173bfa2e6ccULL, 0x12124812905a2482ULL, + 0x40401d403a5d807aULL, 0x0808200840281048ULL, 0xc3c32bc356e89b95ULL, + 0xecec97ec337bc5dfULL, 0xdbdb4bdb9690ab4dULL, 0xa1a1bea1611f5fc0ULL, + 0x8d8d0e8d1c830791ULL, 0x3d3df43df5c97ac8ULL, 0x97976697ccf1335bULL, + 0x0000000000000000ULL, 0xcfcf1bcf36d483f9ULL, 0x2b2bac2b4587566eULL, + 0x7676c57697b3ece1ULL, 0x8282328264b019e6ULL, 0xd6d67fd6fea9b128ULL, + 0x1b1b6c1bd87736c3ULL, 0xb5b5eeb5c15b7774ULL, 0xafaf86af112943beULL, + 0x6a6ab56a77dfd41dULL, 0x50505d50ba0da0eaULL, 0x45450945124c8a57ULL, + 0xf3f3ebf3cb18fb38ULL, 0x3030c0309df060adULL, 0xefef9bef2b74c3c4ULL, + 0x3f3ffc3fe5c37edaULL, 0x55554955921caac7ULL, 0xa2a2b2a2791059dbULL, + 0xeaea8fea0365c9e9ULL, 0x656589650fecca6aULL, 0xbabad2bab9686903ULL, + 0x2f2fbc2f65935e4aULL, 0xc0c027c04ee79d8eULL, 0xdede5fdebe81a160ULL, + 0x1c1c701ce06c38fcULL, 0xfdfdd3fdbb2ee746ULL, 0x4d4d294d52649a1fULL, + 0x92927292e4e03976ULL, 0x7575c9758fbceafaULL, 0x06061806301e0c36ULL, + 0x8a8a128a249809aeULL, 0xb2b2f2b2f940794bULL, 0xe6e6bfe66359d185ULL, + 0x0e0e380e70361c7eULL, 0x1f1f7c1ff8633ee7ULL, 0x6262956237f7c455ULL, + 0xd4d477d4eea3b53aULL, 0xa8a89aa829324d81ULL, 0x96966296c4f43152ULL, + 0xf9f9c3f99b3aef62ULL, 0xc5c533c566f697a3ULL, 0x2525942535b14a10ULL, + 0x59597959f220b2abULL, 0x84842a8454ae15d0ULL, 0x7272d572b7a7e4c5ULL, + 0x3939e439d5dd72ecULL, 0x4c4c2d4c5a619816ULL, 0x5e5e655eca3bbc94ULL, + 0x7878fd78e785f09fULL, 0x3838e038ddd870e5ULL, 0x8c8c0a8c14860598ULL, + 0xd1d163d1c6b2bf17ULL, 0xa5a5aea5410b57e4ULL, 0xe2e2afe2434dd9a1ULL, + 0x616199612ff8c24eULL, 0xb3b3f6b3f1457b42ULL, 0x2121842115a54234ULL, + 0x9c9c4a9c94d62508ULL, 0x1e1e781ef0663ceeULL, 0x4343114322528661ULL, + 0xc7c73bc776fc93b1ULL, 0xfcfcd7fcb32be54fULL, 0x0404100420140824ULL, + 0x51515951b208a2e3ULL, 0x99995e99bcc72f25ULL, 0x6d6da96d4fc4da22ULL, + 0x0d0d340d68391a65ULL, 0xfafacffa8335e979ULL, 0xdfdf5bdfb684a369ULL, + 0x7e7ee57ed79bfca9ULL, 0x242490243db44819ULL, 0x3b3bec3bc5d776feULL, + 0xabab96ab313d4b9aULL, 0xcece1fce3ed181f0ULL, 0x1111441188552299ULL, + 0x8f8f068f0c890383ULL, 0x4e4e254e4a6b9c04ULL, 0xb7b7e6b7d1517366ULL, + 0xebeb8beb0b60cbe0ULL, 0x3c3cf03cfdcc78c1ULL, 0x81813e817cbf1ffdULL, + 0x94946a94d4fe3540ULL, 0xf7f7fbf7eb0cf31cULL, 0xb9b9deb9a1676f18ULL, + 0x13134c13985f268bULL, 0x2c2cb02c7d9c5851ULL, 0xd3d36bd3d6b8bb05ULL, + 0xe7e7bbe76b5cd38cULL, 0x6e6ea56e57cbdc39ULL, 0xc4c437c46ef395aaULL, + 0x03030c03180f061bULL, 0x565645568a13acdcULL, 0x44440d441a49885eULL, + 0x7f7fe17fdf9efea0ULL, 0xa9a99ea921374f88ULL, 0x2a2aa82a4d825467ULL, + 0xbbbbd6bbb16d6b0aULL, 0xc1c123c146e29f87ULL, 0x53535153a202a6f1ULL, + 0xdcdc57dcae8ba572ULL, 0x0b0b2c0b58271653ULL, 0x9d9d4e9d9cd32701ULL, + 0x6c6cad6c47c1d82bULL, 0x3131c43195f562a4ULL, 0x7474cd7487b9e8f3ULL, + 0xf6f6fff6e309f115ULL, 0x464605460a438c4cULL, 0xacac8aac092645a5ULL, + 0x89891e893c970fb5ULL, 0x14145014a04428b4ULL, 0xe1e1a3e15b42dfbaULL, + 0x16165816b04e2ca6ULL, 0x3a3ae83acdd274f7ULL, 0x6969b9696fd0d206ULL, + 0x09092409482d1241ULL, 0x7070dd70a7ade0d7ULL, 0xb6b6e2b6d954716fULL, + 0xd0d067d0ceb7bd1eULL, 0xeded93ed3b7ec7d6ULL, 0xcccc17cc2edb85e2ULL, + 0x424215422a578468ULL, 0x98985a98b4c22d2cULL, 0xa4a4aaa4490e55edULL, + 0x2828a0285d885075ULL, 0x5c5c6d5cda31b886ULL, 0xf8f8c7f8933fed6bULL, + 0x8686228644a411c2ULL, +}; + +static const u64 C1[256] = { + 0xd818186018c07830ULL, 0x2623238c2305af46ULL, 0xb8c6c63fc67ef991ULL, + 0xfbe8e887e8136fcdULL, 0xcb878726874ca113ULL, 0x11b8b8dab8a9626dULL, + 0x0901010401080502ULL, 0x0d4f4f214f426e9eULL, 0x9b3636d836adee6cULL, + 0xffa6a6a2a6590451ULL, 0x0cd2d26fd2debdb9ULL, 0x0ef5f5f3f5fb06f7ULL, + 0x967979f979ef80f2ULL, 0x306f6fa16f5fcedeULL, 0x6d91917e91fcef3fULL, + 0xf852525552aa07a4ULL, 0x4760609d6027fdc0ULL, 0x35bcbccabc897665ULL, + 0x379b9b569baccd2bULL, 0x8a8e8e028e048c01ULL, 0xd2a3a3b6a371155bULL, + 0x6c0c0c300c603c18ULL, 0x847b7bf17bff8af6ULL, 0x803535d435b5e16aULL, + 0xf51d1d741de8693aULL, 0xb3e0e0a7e05347ddULL, 0x21d7d77bd7f6acb3ULL, + 0x9cc2c22fc25eed99ULL, 0x432e2eb82e6d965cULL, 0x294b4b314b627a96ULL, + 0x5dfefedffea321e1ULL, 0xd5575741578216aeULL, 0xbd15155415a8412aULL, + 0xe87777c1779fb6eeULL, 0x923737dc37a5eb6eULL, 0x9ee5e5b3e57b56d7ULL, + 0x139f9f469f8cd923ULL, 0x23f0f0e7f0d317fdULL, 0x204a4a354a6a7f94ULL, + 0x44dada4fda9e95a9ULL, 0xa258587d58fa25b0ULL, 0xcfc9c903c906ca8fULL, + 0x7c2929a429558d52ULL, 0x5a0a0a280a502214ULL, 0x50b1b1feb1e14f7fULL, + 0xc9a0a0baa0691a5dULL, 0x146b6bb16b7fdad6ULL, 0xd985852e855cab17ULL, + 0x3cbdbdcebd817367ULL, 0x8f5d5d695dd234baULL, 0x9010104010805020ULL, + 0x07f4f4f7f4f303f5ULL, 0xddcbcb0bcb16c08bULL, 0xd33e3ef83eedc67cULL, + 0x2d0505140528110aULL, 0x78676781671fe6ceULL, 0x97e4e4b7e47353d5ULL, + 0x0227279c2725bb4eULL, 0x7341411941325882ULL, 0xa78b8b168b2c9d0bULL, + 0xf6a7a7a6a7510153ULL, 0xb27d7de97dcf94faULL, 0x4995956e95dcfb37ULL, + 0x56d8d847d88e9fadULL, 0x70fbfbcbfb8b30ebULL, 0xcdeeee9fee2371c1ULL, + 0xbb7c7ced7cc791f8ULL, 0x716666856617e3ccULL, 0x7bdddd53dda68ea7ULL, + 0xaf17175c17b84b2eULL, 0x454747014702468eULL, 0x1a9e9e429e84dc21ULL, + 0xd4caca0fca1ec589ULL, 0x582d2db42d75995aULL, 0x2ebfbfc6bf917963ULL, + 0x3f07071c07381b0eULL, 0xacadad8ead012347ULL, 0xb05a5a755aea2fb4ULL, + 0xef838336836cb51bULL, 0xb63333cc3385ff66ULL, 0x5c636391633ff2c6ULL, + 0x1202020802100a04ULL, 0x93aaaa92aa393849ULL, 0xde7171d971afa8e2ULL, + 0xc6c8c807c80ecf8dULL, 0xd119196419c87d32ULL, 0x3b49493949727092ULL, + 0x5fd9d943d9869aafULL, 0x31f2f2eff2c31df9ULL, 0xa8e3e3abe34b48dbULL, + 0xb95b5b715be22ab6ULL, 0xbc88881a8834920dULL, 0x3e9a9a529aa4c829ULL, + 0x0b262698262dbe4cULL, 0xbf3232c8328dfa64ULL, 0x59b0b0fab0e94a7dULL, + 0xf2e9e983e91b6acfULL, 0x770f0f3c0f78331eULL, 0x33d5d573d5e6a6b7ULL, + 0xf480803a8074ba1dULL, 0x27bebec2be997c61ULL, 0xebcdcd13cd26de87ULL, + 0x893434d034bde468ULL, 0x3248483d487a7590ULL, 0x54ffffdbffab24e3ULL, + 0x8d7a7af57af78ff4ULL, 0x6490907a90f4ea3dULL, 0x9d5f5f615fc23ebeULL, + 0x3d202080201da040ULL, 0x0f6868bd6867d5d0ULL, 0xca1a1a681ad07234ULL, + 0xb7aeae82ae192c41ULL, 0x7db4b4eab4c95e75ULL, 0xce54544d549a19a8ULL, + 0x7f93937693ece53bULL, 0x2f222288220daa44ULL, 0x6364648d6407e9c8ULL, + 0x2af1f1e3f1db12ffULL, 0xcc7373d173bfa2e6ULL, 0x8212124812905a24ULL, + 0x7a40401d403a5d80ULL, 0x4808082008402810ULL, 0x95c3c32bc356e89bULL, + 0xdfecec97ec337bc5ULL, 0x4ddbdb4bdb9690abULL, 0xc0a1a1bea1611f5fULL, + 0x918d8d0e8d1c8307ULL, 0xc83d3df43df5c97aULL, 0x5b97976697ccf133ULL, + 0x0000000000000000ULL, 0xf9cfcf1bcf36d483ULL, 0x6e2b2bac2b458756ULL, + 0xe17676c57697b3ecULL, 0xe68282328264b019ULL, 0x28d6d67fd6fea9b1ULL, + 0xc31b1b6c1bd87736ULL, 0x74b5b5eeb5c15b77ULL, 0xbeafaf86af112943ULL, + 0x1d6a6ab56a77dfd4ULL, 0xea50505d50ba0da0ULL, 0x5745450945124c8aULL, + 0x38f3f3ebf3cb18fbULL, 0xad3030c0309df060ULL, 0xc4efef9bef2b74c3ULL, + 0xda3f3ffc3fe5c37eULL, 0xc755554955921caaULL, 0xdba2a2b2a2791059ULL, + 0xe9eaea8fea0365c9ULL, 0x6a656589650feccaULL, 0x03babad2bab96869ULL, + 0x4a2f2fbc2f65935eULL, 0x8ec0c027c04ee79dULL, 0x60dede5fdebe81a1ULL, + 0xfc1c1c701ce06c38ULL, 0x46fdfdd3fdbb2ee7ULL, 0x1f4d4d294d52649aULL, + 0x7692927292e4e039ULL, 0xfa7575c9758fbceaULL, 0x3606061806301e0cULL, + 0xae8a8a128a249809ULL, 0x4bb2b2f2b2f94079ULL, 0x85e6e6bfe66359d1ULL, + 0x7e0e0e380e70361cULL, 0xe71f1f7c1ff8633eULL, 0x556262956237f7c4ULL, + 0x3ad4d477d4eea3b5ULL, 0x81a8a89aa829324dULL, 0x5296966296c4f431ULL, + 0x62f9f9c3f99b3aefULL, 0xa3c5c533c566f697ULL, 0x102525942535b14aULL, + 0xab59597959f220b2ULL, 0xd084842a8454ae15ULL, 0xc57272d572b7a7e4ULL, + 0xec3939e439d5dd72ULL, 0x164c4c2d4c5a6198ULL, 0x945e5e655eca3bbcULL, + 0x9f7878fd78e785f0ULL, 0xe53838e038ddd870ULL, 0x988c8c0a8c148605ULL, + 0x17d1d163d1c6b2bfULL, 0xe4a5a5aea5410b57ULL, 0xa1e2e2afe2434dd9ULL, + 0x4e616199612ff8c2ULL, 0x42b3b3f6b3f1457bULL, 0x342121842115a542ULL, + 0x089c9c4a9c94d625ULL, 0xee1e1e781ef0663cULL, 0x6143431143225286ULL, + 0xb1c7c73bc776fc93ULL, 0x4ffcfcd7fcb32be5ULL, 0x2404041004201408ULL, + 0xe351515951b208a2ULL, 0x2599995e99bcc72fULL, 0x226d6da96d4fc4daULL, + 0x650d0d340d68391aULL, 0x79fafacffa8335e9ULL, 0x69dfdf5bdfb684a3ULL, + 0xa97e7ee57ed79bfcULL, 0x19242490243db448ULL, 0xfe3b3bec3bc5d776ULL, + 0x9aabab96ab313d4bULL, 0xf0cece1fce3ed181ULL, 0x9911114411885522ULL, + 0x838f8f068f0c8903ULL, 0x044e4e254e4a6b9cULL, 0x66b7b7e6b7d15173ULL, + 0xe0ebeb8beb0b60cbULL, 0xc13c3cf03cfdcc78ULL, 0xfd81813e817cbf1fULL, + 0x4094946a94d4fe35ULL, 0x1cf7f7fbf7eb0cf3ULL, 0x18b9b9deb9a1676fULL, + 0x8b13134c13985f26ULL, 0x512c2cb02c7d9c58ULL, 0x05d3d36bd3d6b8bbULL, + 0x8ce7e7bbe76b5cd3ULL, 0x396e6ea56e57cbdcULL, 0xaac4c437c46ef395ULL, + 0x1b03030c03180f06ULL, 0xdc565645568a13acULL, 0x5e44440d441a4988ULL, + 0xa07f7fe17fdf9efeULL, 0x88a9a99ea921374fULL, 0x672a2aa82a4d8254ULL, + 0x0abbbbd6bbb16d6bULL, 0x87c1c123c146e29fULL, 0xf153535153a202a6ULL, + 0x72dcdc57dcae8ba5ULL, 0x530b0b2c0b582716ULL, 0x019d9d4e9d9cd327ULL, + 0x2b6c6cad6c47c1d8ULL, 0xa43131c43195f562ULL, 0xf37474cd7487b9e8ULL, + 0x15f6f6fff6e309f1ULL, 0x4c464605460a438cULL, 0xa5acac8aac092645ULL, + 0xb589891e893c970fULL, 0xb414145014a04428ULL, 0xbae1e1a3e15b42dfULL, + 0xa616165816b04e2cULL, 0xf73a3ae83acdd274ULL, 0x066969b9696fd0d2ULL, + 0x4109092409482d12ULL, 0xd77070dd70a7ade0ULL, 0x6fb6b6e2b6d95471ULL, + 0x1ed0d067d0ceb7bdULL, 0xd6eded93ed3b7ec7ULL, 0xe2cccc17cc2edb85ULL, + 0x68424215422a5784ULL, 0x2c98985a98b4c22dULL, 0xeda4a4aaa4490e55ULL, + 0x752828a0285d8850ULL, 0x865c5c6d5cda31b8ULL, 0x6bf8f8c7f8933fedULL, + 0xc28686228644a411ULL, +}; + +static const u64 C2[256] = { + 0x30d818186018c078ULL, 0x462623238c2305afULL, 0x91b8c6c63fc67ef9ULL, + 0xcdfbe8e887e8136fULL, 0x13cb878726874ca1ULL, 0x6d11b8b8dab8a962ULL, + 0x0209010104010805ULL, 0x9e0d4f4f214f426eULL, 0x6c9b3636d836adeeULL, + 0x51ffa6a6a2a65904ULL, 0xb90cd2d26fd2debdULL, 0xf70ef5f5f3f5fb06ULL, + 0xf2967979f979ef80ULL, 0xde306f6fa16f5fceULL, 0x3f6d91917e91fcefULL, + 0xa4f852525552aa07ULL, 0xc04760609d6027fdULL, 0x6535bcbccabc8976ULL, + 0x2b379b9b569baccdULL, 0x018a8e8e028e048cULL, 0x5bd2a3a3b6a37115ULL, + 0x186c0c0c300c603cULL, 0xf6847b7bf17bff8aULL, 0x6a803535d435b5e1ULL, + 0x3af51d1d741de869ULL, 0xddb3e0e0a7e05347ULL, 0xb321d7d77bd7f6acULL, + 0x999cc2c22fc25eedULL, 0x5c432e2eb82e6d96ULL, 0x96294b4b314b627aULL, + 0xe15dfefedffea321ULL, 0xaed5575741578216ULL, 0x2abd15155415a841ULL, + 0xeee87777c1779fb6ULL, 0x6e923737dc37a5ebULL, 0xd79ee5e5b3e57b56ULL, + 0x23139f9f469f8cd9ULL, 0xfd23f0f0e7f0d317ULL, 0x94204a4a354a6a7fULL, + 0xa944dada4fda9e95ULL, 0xb0a258587d58fa25ULL, 0x8fcfc9c903c906caULL, + 0x527c2929a429558dULL, 0x145a0a0a280a5022ULL, 0x7f50b1b1feb1e14fULL, + 0x5dc9a0a0baa0691aULL, 0xd6146b6bb16b7fdaULL, 0x17d985852e855cabULL, + 0x673cbdbdcebd8173ULL, 0xba8f5d5d695dd234ULL, 0x2090101040108050ULL, + 0xf507f4f4f7f4f303ULL, 0x8bddcbcb0bcb16c0ULL, 0x7cd33e3ef83eedc6ULL, + 0x0a2d050514052811ULL, 0xce78676781671fe6ULL, 0xd597e4e4b7e47353ULL, + 0x4e0227279c2725bbULL, 0x8273414119413258ULL, 0x0ba78b8b168b2c9dULL, + 0x53f6a7a7a6a75101ULL, 0xfab27d7de97dcf94ULL, 0x374995956e95dcfbULL, + 0xad56d8d847d88e9fULL, 0xeb70fbfbcbfb8b30ULL, 0xc1cdeeee9fee2371ULL, + 0xf8bb7c7ced7cc791ULL, 0xcc716666856617e3ULL, 0xa77bdddd53dda68eULL, + 0x2eaf17175c17b84bULL, 0x8e45474701470246ULL, 0x211a9e9e429e84dcULL, + 0x89d4caca0fca1ec5ULL, 0x5a582d2db42d7599ULL, 0x632ebfbfc6bf9179ULL, + 0x0e3f07071c07381bULL, 0x47acadad8ead0123ULL, 0xb4b05a5a755aea2fULL, + 0x1bef838336836cb5ULL, 0x66b63333cc3385ffULL, 0xc65c636391633ff2ULL, + 0x041202020802100aULL, 0x4993aaaa92aa3938ULL, 0xe2de7171d971afa8ULL, + 0x8dc6c8c807c80ecfULL, 0x32d119196419c87dULL, 0x923b494939497270ULL, + 0xaf5fd9d943d9869aULL, 0xf931f2f2eff2c31dULL, 0xdba8e3e3abe34b48ULL, + 0xb6b95b5b715be22aULL, 0x0dbc88881a883492ULL, 0x293e9a9a529aa4c8ULL, + 0x4c0b262698262dbeULL, 0x64bf3232c8328dfaULL, 0x7d59b0b0fab0e94aULL, + 0xcff2e9e983e91b6aULL, 0x1e770f0f3c0f7833ULL, 0xb733d5d573d5e6a6ULL, + 0x1df480803a8074baULL, 0x6127bebec2be997cULL, 0x87ebcdcd13cd26deULL, + 0x68893434d034bde4ULL, 0x903248483d487a75ULL, 0xe354ffffdbffab24ULL, + 0xf48d7a7af57af78fULL, 0x3d6490907a90f4eaULL, 0xbe9d5f5f615fc23eULL, + 0x403d202080201da0ULL, 0xd00f6868bd6867d5ULL, 0x34ca1a1a681ad072ULL, + 0x41b7aeae82ae192cULL, 0x757db4b4eab4c95eULL, 0xa8ce54544d549a19ULL, + 0x3b7f93937693ece5ULL, 0x442f222288220daaULL, 0xc86364648d6407e9ULL, + 0xff2af1f1e3f1db12ULL, 0xe6cc7373d173bfa2ULL, 0x248212124812905aULL, + 0x807a40401d403a5dULL, 0x1048080820084028ULL, 0x9b95c3c32bc356e8ULL, + 0xc5dfecec97ec337bULL, 0xab4ddbdb4bdb9690ULL, 0x5fc0a1a1bea1611fULL, + 0x07918d8d0e8d1c83ULL, 0x7ac83d3df43df5c9ULL, 0x335b97976697ccf1ULL, + 0x0000000000000000ULL, 0x83f9cfcf1bcf36d4ULL, 0x566e2b2bac2b4587ULL, + 0xece17676c57697b3ULL, 0x19e68282328264b0ULL, 0xb128d6d67fd6fea9ULL, + 0x36c31b1b6c1bd877ULL, 0x7774b5b5eeb5c15bULL, 0x43beafaf86af1129ULL, + 0xd41d6a6ab56a77dfULL, 0xa0ea50505d50ba0dULL, 0x8a5745450945124cULL, + 0xfb38f3f3ebf3cb18ULL, 0x60ad3030c0309df0ULL, 0xc3c4efef9bef2b74ULL, + 0x7eda3f3ffc3fe5c3ULL, 0xaac755554955921cULL, 0x59dba2a2b2a27910ULL, + 0xc9e9eaea8fea0365ULL, 0xca6a656589650fecULL, 0x6903babad2bab968ULL, + 0x5e4a2f2fbc2f6593ULL, 0x9d8ec0c027c04ee7ULL, 0xa160dede5fdebe81ULL, + 0x38fc1c1c701ce06cULL, 0xe746fdfdd3fdbb2eULL, 0x9a1f4d4d294d5264ULL, + 0x397692927292e4e0ULL, 0xeafa7575c9758fbcULL, 0x0c3606061806301eULL, + 0x09ae8a8a128a2498ULL, 0x794bb2b2f2b2f940ULL, 0xd185e6e6bfe66359ULL, + 0x1c7e0e0e380e7036ULL, 0x3ee71f1f7c1ff863ULL, 0xc4556262956237f7ULL, + 0xb53ad4d477d4eea3ULL, 0x4d81a8a89aa82932ULL, 0x315296966296c4f4ULL, + 0xef62f9f9c3f99b3aULL, 0x97a3c5c533c566f6ULL, 0x4a102525942535b1ULL, + 0xb2ab59597959f220ULL, 0x15d084842a8454aeULL, 0xe4c57272d572b7a7ULL, + 0x72ec3939e439d5ddULL, 0x98164c4c2d4c5a61ULL, 0xbc945e5e655eca3bULL, + 0xf09f7878fd78e785ULL, 0x70e53838e038ddd8ULL, 0x05988c8c0a8c1486ULL, + 0xbf17d1d163d1c6b2ULL, 0x57e4a5a5aea5410bULL, 0xd9a1e2e2afe2434dULL, + 0xc24e616199612ff8ULL, 0x7b42b3b3f6b3f145ULL, 0x42342121842115a5ULL, + 0x25089c9c4a9c94d6ULL, 0x3cee1e1e781ef066ULL, 0x8661434311432252ULL, + 0x93b1c7c73bc776fcULL, 0xe54ffcfcd7fcb32bULL, 0x0824040410042014ULL, + 0xa2e351515951b208ULL, 0x2f2599995e99bcc7ULL, 0xda226d6da96d4fc4ULL, + 0x1a650d0d340d6839ULL, 0xe979fafacffa8335ULL, 0xa369dfdf5bdfb684ULL, + 0xfca97e7ee57ed79bULL, 0x4819242490243db4ULL, 0x76fe3b3bec3bc5d7ULL, + 0x4b9aabab96ab313dULL, 0x81f0cece1fce3ed1ULL, 0x2299111144118855ULL, + 0x03838f8f068f0c89ULL, 0x9c044e4e254e4a6bULL, 0x7366b7b7e6b7d151ULL, + 0xcbe0ebeb8beb0b60ULL, 0x78c13c3cf03cfdccULL, 0x1ffd81813e817cbfULL, + 0x354094946a94d4feULL, 0xf31cf7f7fbf7eb0cULL, 0x6f18b9b9deb9a167ULL, + 0x268b13134c13985fULL, 0x58512c2cb02c7d9cULL, 0xbb05d3d36bd3d6b8ULL, + 0xd38ce7e7bbe76b5cULL, 0xdc396e6ea56e57cbULL, 0x95aac4c437c46ef3ULL, + 0x061b03030c03180fULL, 0xacdc565645568a13ULL, 0x885e44440d441a49ULL, + 0xfea07f7fe17fdf9eULL, 0x4f88a9a99ea92137ULL, 0x54672a2aa82a4d82ULL, + 0x6b0abbbbd6bbb16dULL, 0x9f87c1c123c146e2ULL, 0xa6f153535153a202ULL, + 0xa572dcdc57dcae8bULL, 0x16530b0b2c0b5827ULL, 0x27019d9d4e9d9cd3ULL, + 0xd82b6c6cad6c47c1ULL, 0x62a43131c43195f5ULL, 0xe8f37474cd7487b9ULL, + 0xf115f6f6fff6e309ULL, 0x8c4c464605460a43ULL, 0x45a5acac8aac0926ULL, + 0x0fb589891e893c97ULL, 0x28b414145014a044ULL, 0xdfbae1e1a3e15b42ULL, + 0x2ca616165816b04eULL, 0x74f73a3ae83acdd2ULL, 0xd2066969b9696fd0ULL, + 0x124109092409482dULL, 0xe0d77070dd70a7adULL, 0x716fb6b6e2b6d954ULL, + 0xbd1ed0d067d0ceb7ULL, 0xc7d6eded93ed3b7eULL, 0x85e2cccc17cc2edbULL, + 0x8468424215422a57ULL, 0x2d2c98985a98b4c2ULL, 0x55eda4a4aaa4490eULL, + 0x50752828a0285d88ULL, 0xb8865c5c6d5cda31ULL, 0xed6bf8f8c7f8933fULL, + 0x11c28686228644a4ULL, +}; + +static const u64 C3[256] = { + 0x7830d818186018c0ULL, 0xaf462623238c2305ULL, 0xf991b8c6c63fc67eULL, + 0x6fcdfbe8e887e813ULL, 0xa113cb878726874cULL, 0x626d11b8b8dab8a9ULL, + 0x0502090101040108ULL, 0x6e9e0d4f4f214f42ULL, 0xee6c9b3636d836adULL, + 0x0451ffa6a6a2a659ULL, 0xbdb90cd2d26fd2deULL, 0x06f70ef5f5f3f5fbULL, + 0x80f2967979f979efULL, 0xcede306f6fa16f5fULL, 0xef3f6d91917e91fcULL, + 0x07a4f852525552aaULL, 0xfdc04760609d6027ULL, 0x766535bcbccabc89ULL, + 0xcd2b379b9b569bacULL, 0x8c018a8e8e028e04ULL, 0x155bd2a3a3b6a371ULL, + 0x3c186c0c0c300c60ULL, 0x8af6847b7bf17bffULL, 0xe16a803535d435b5ULL, + 0x693af51d1d741de8ULL, 0x47ddb3e0e0a7e053ULL, 0xacb321d7d77bd7f6ULL, + 0xed999cc2c22fc25eULL, 0x965c432e2eb82e6dULL, 0x7a96294b4b314b62ULL, + 0x21e15dfefedffea3ULL, 0x16aed55757415782ULL, 0x412abd15155415a8ULL, + 0xb6eee87777c1779fULL, 0xeb6e923737dc37a5ULL, 0x56d79ee5e5b3e57bULL, + 0xd923139f9f469f8cULL, 0x17fd23f0f0e7f0d3ULL, 0x7f94204a4a354a6aULL, + 0x95a944dada4fda9eULL, 0x25b0a258587d58faULL, 0xca8fcfc9c903c906ULL, + 0x8d527c2929a42955ULL, 0x22145a0a0a280a50ULL, 0x4f7f50b1b1feb1e1ULL, + 0x1a5dc9a0a0baa069ULL, 0xdad6146b6bb16b7fULL, 0xab17d985852e855cULL, + 0x73673cbdbdcebd81ULL, 0x34ba8f5d5d695dd2ULL, 0x5020901010401080ULL, + 0x03f507f4f4f7f4f3ULL, 0xc08bddcbcb0bcb16ULL, 0xc67cd33e3ef83eedULL, + 0x110a2d0505140528ULL, 0xe6ce78676781671fULL, 0x53d597e4e4b7e473ULL, + 0xbb4e0227279c2725ULL, 0x5882734141194132ULL, 0x9d0ba78b8b168b2cULL, + 0x0153f6a7a7a6a751ULL, 0x94fab27d7de97dcfULL, 0xfb374995956e95dcULL, + 0x9fad56d8d847d88eULL, 0x30eb70fbfbcbfb8bULL, 0x71c1cdeeee9fee23ULL, + 0x91f8bb7c7ced7cc7ULL, 0xe3cc716666856617ULL, 0x8ea77bdddd53dda6ULL, + 0x4b2eaf17175c17b8ULL, 0x468e454747014702ULL, 0xdc211a9e9e429e84ULL, + 0xc589d4caca0fca1eULL, 0x995a582d2db42d75ULL, 0x79632ebfbfc6bf91ULL, + 0x1b0e3f07071c0738ULL, 0x2347acadad8ead01ULL, 0x2fb4b05a5a755aeaULL, + 0xb51bef838336836cULL, 0xff66b63333cc3385ULL, 0xf2c65c636391633fULL, + 0x0a04120202080210ULL, 0x384993aaaa92aa39ULL, 0xa8e2de7171d971afULL, + 0xcf8dc6c8c807c80eULL, 0x7d32d119196419c8ULL, 0x70923b4949394972ULL, + 0x9aaf5fd9d943d986ULL, 0x1df931f2f2eff2c3ULL, 0x48dba8e3e3abe34bULL, + 0x2ab6b95b5b715be2ULL, 0x920dbc88881a8834ULL, 0xc8293e9a9a529aa4ULL, + 0xbe4c0b262698262dULL, 0xfa64bf3232c8328dULL, 0x4a7d59b0b0fab0e9ULL, + 0x6acff2e9e983e91bULL, 0x331e770f0f3c0f78ULL, 0xa6b733d5d573d5e6ULL, + 0xba1df480803a8074ULL, 0x7c6127bebec2be99ULL, 0xde87ebcdcd13cd26ULL, + 0xe468893434d034bdULL, 0x75903248483d487aULL, 0x24e354ffffdbffabULL, + 0x8ff48d7a7af57af7ULL, 0xea3d6490907a90f4ULL, 0x3ebe9d5f5f615fc2ULL, + 0xa0403d202080201dULL, 0xd5d00f6868bd6867ULL, 0x7234ca1a1a681ad0ULL, + 0x2c41b7aeae82ae19ULL, 0x5e757db4b4eab4c9ULL, 0x19a8ce54544d549aULL, + 0xe53b7f93937693ecULL, 0xaa442f222288220dULL, 0xe9c86364648d6407ULL, + 0x12ff2af1f1e3f1dbULL, 0xa2e6cc7373d173bfULL, 0x5a24821212481290ULL, + 0x5d807a40401d403aULL, 0x2810480808200840ULL, 0xe89b95c3c32bc356ULL, + 0x7bc5dfecec97ec33ULL, 0x90ab4ddbdb4bdb96ULL, 0x1f5fc0a1a1bea161ULL, + 0x8307918d8d0e8d1cULL, 0xc97ac83d3df43df5ULL, 0xf1335b97976697ccULL, + 0x0000000000000000ULL, 0xd483f9cfcf1bcf36ULL, 0x87566e2b2bac2b45ULL, + 0xb3ece17676c57697ULL, 0xb019e68282328264ULL, 0xa9b128d6d67fd6feULL, + 0x7736c31b1b6c1bd8ULL, 0x5b7774b5b5eeb5c1ULL, 0x2943beafaf86af11ULL, + 0xdfd41d6a6ab56a77ULL, 0x0da0ea50505d50baULL, 0x4c8a574545094512ULL, + 0x18fb38f3f3ebf3cbULL, 0xf060ad3030c0309dULL, 0x74c3c4efef9bef2bULL, + 0xc37eda3f3ffc3fe5ULL, 0x1caac75555495592ULL, 0x1059dba2a2b2a279ULL, + 0x65c9e9eaea8fea03ULL, 0xecca6a656589650fULL, 0x686903babad2bab9ULL, + 0x935e4a2f2fbc2f65ULL, 0xe79d8ec0c027c04eULL, 0x81a160dede5fdebeULL, + 0x6c38fc1c1c701ce0ULL, 0x2ee746fdfdd3fdbbULL, 0x649a1f4d4d294d52ULL, + 0xe0397692927292e4ULL, 0xbceafa7575c9758fULL, 0x1e0c360606180630ULL, + 0x9809ae8a8a128a24ULL, 0x40794bb2b2f2b2f9ULL, 0x59d185e6e6bfe663ULL, + 0x361c7e0e0e380e70ULL, 0x633ee71f1f7c1ff8ULL, 0xf7c4556262956237ULL, + 0xa3b53ad4d477d4eeULL, 0x324d81a8a89aa829ULL, 0xf4315296966296c4ULL, + 0x3aef62f9f9c3f99bULL, 0xf697a3c5c533c566ULL, 0xb14a102525942535ULL, + 0x20b2ab59597959f2ULL, 0xae15d084842a8454ULL, 0xa7e4c57272d572b7ULL, + 0xdd72ec3939e439d5ULL, 0x6198164c4c2d4c5aULL, 0x3bbc945e5e655ecaULL, + 0x85f09f7878fd78e7ULL, 0xd870e53838e038ddULL, 0x8605988c8c0a8c14ULL, + 0xb2bf17d1d163d1c6ULL, 0x0b57e4a5a5aea541ULL, 0x4dd9a1e2e2afe243ULL, + 0xf8c24e616199612fULL, 0x457b42b3b3f6b3f1ULL, 0xa542342121842115ULL, + 0xd625089c9c4a9c94ULL, 0x663cee1e1e781ef0ULL, 0x5286614343114322ULL, + 0xfc93b1c7c73bc776ULL, 0x2be54ffcfcd7fcb3ULL, 0x1408240404100420ULL, + 0x08a2e351515951b2ULL, 0xc72f2599995e99bcULL, 0xc4da226d6da96d4fULL, + 0x391a650d0d340d68ULL, 0x35e979fafacffa83ULL, 0x84a369dfdf5bdfb6ULL, + 0x9bfca97e7ee57ed7ULL, 0xb44819242490243dULL, 0xd776fe3b3bec3bc5ULL, + 0x3d4b9aabab96ab31ULL, 0xd181f0cece1fce3eULL, 0x5522991111441188ULL, + 0x8903838f8f068f0cULL, 0x6b9c044e4e254e4aULL, 0x517366b7b7e6b7d1ULL, + 0x60cbe0ebeb8beb0bULL, 0xcc78c13c3cf03cfdULL, 0xbf1ffd81813e817cULL, + 0xfe354094946a94d4ULL, 0x0cf31cf7f7fbf7ebULL, 0x676f18b9b9deb9a1ULL, + 0x5f268b13134c1398ULL, 0x9c58512c2cb02c7dULL, 0xb8bb05d3d36bd3d6ULL, + 0x5cd38ce7e7bbe76bULL, 0xcbdc396e6ea56e57ULL, 0xf395aac4c437c46eULL, + 0x0f061b03030c0318ULL, 0x13acdc565645568aULL, 0x49885e44440d441aULL, + 0x9efea07f7fe17fdfULL, 0x374f88a9a99ea921ULL, 0x8254672a2aa82a4dULL, + 0x6d6b0abbbbd6bbb1ULL, 0xe29f87c1c123c146ULL, 0x02a6f153535153a2ULL, + 0x8ba572dcdc57dcaeULL, 0x2716530b0b2c0b58ULL, 0xd327019d9d4e9d9cULL, + 0xc1d82b6c6cad6c47ULL, 0xf562a43131c43195ULL, 0xb9e8f37474cd7487ULL, + 0x09f115f6f6fff6e3ULL, 0x438c4c464605460aULL, 0x2645a5acac8aac09ULL, + 0x970fb589891e893cULL, 0x4428b414145014a0ULL, 0x42dfbae1e1a3e15bULL, + 0x4e2ca616165816b0ULL, 0xd274f73a3ae83acdULL, 0xd0d2066969b9696fULL, + 0x2d12410909240948ULL, 0xade0d77070dd70a7ULL, 0x54716fb6b6e2b6d9ULL, + 0xb7bd1ed0d067d0ceULL, 0x7ec7d6eded93ed3bULL, 0xdb85e2cccc17cc2eULL, + 0x578468424215422aULL, 0xc22d2c98985a98b4ULL, 0x0e55eda4a4aaa449ULL, + 0x8850752828a0285dULL, 0x31b8865c5c6d5cdaULL, 0x3fed6bf8f8c7f893ULL, + 0xa411c28686228644ULL, +}; + +static const u64 C4[256] = { + 0xc07830d818186018ULL, 0x05af462623238c23ULL, 0x7ef991b8c6c63fc6ULL, + 0x136fcdfbe8e887e8ULL, 0x4ca113cb87872687ULL, 0xa9626d11b8b8dab8ULL, + 0x0805020901010401ULL, 0x426e9e0d4f4f214fULL, 0xadee6c9b3636d836ULL, + 0x590451ffa6a6a2a6ULL, 0xdebdb90cd2d26fd2ULL, 0xfb06f70ef5f5f3f5ULL, + 0xef80f2967979f979ULL, 0x5fcede306f6fa16fULL, 0xfcef3f6d91917e91ULL, + 0xaa07a4f852525552ULL, 0x27fdc04760609d60ULL, 0x89766535bcbccabcULL, + 0xaccd2b379b9b569bULL, 0x048c018a8e8e028eULL, 0x71155bd2a3a3b6a3ULL, + 0x603c186c0c0c300cULL, 0xff8af6847b7bf17bULL, 0xb5e16a803535d435ULL, + 0xe8693af51d1d741dULL, 0x5347ddb3e0e0a7e0ULL, 0xf6acb321d7d77bd7ULL, + 0x5eed999cc2c22fc2ULL, 0x6d965c432e2eb82eULL, 0x627a96294b4b314bULL, + 0xa321e15dfefedffeULL, 0x8216aed557574157ULL, 0xa8412abd15155415ULL, + 0x9fb6eee87777c177ULL, 0xa5eb6e923737dc37ULL, 0x7b56d79ee5e5b3e5ULL, + 0x8cd923139f9f469fULL, 0xd317fd23f0f0e7f0ULL, 0x6a7f94204a4a354aULL, + 0x9e95a944dada4fdaULL, 0xfa25b0a258587d58ULL, 0x06ca8fcfc9c903c9ULL, + 0x558d527c2929a429ULL, 0x5022145a0a0a280aULL, 0xe14f7f50b1b1feb1ULL, + 0x691a5dc9a0a0baa0ULL, 0x7fdad6146b6bb16bULL, 0x5cab17d985852e85ULL, + 0x8173673cbdbdcebdULL, 0xd234ba8f5d5d695dULL, 0x8050209010104010ULL, + 0xf303f507f4f4f7f4ULL, 0x16c08bddcbcb0bcbULL, 0xedc67cd33e3ef83eULL, + 0x28110a2d05051405ULL, 0x1fe6ce7867678167ULL, 0x7353d597e4e4b7e4ULL, + 0x25bb4e0227279c27ULL, 0x3258827341411941ULL, 0x2c9d0ba78b8b168bULL, + 0x510153f6a7a7a6a7ULL, 0xcf94fab27d7de97dULL, 0xdcfb374995956e95ULL, + 0x8e9fad56d8d847d8ULL, 0x8b30eb70fbfbcbfbULL, 0x2371c1cdeeee9feeULL, + 0xc791f8bb7c7ced7cULL, 0x17e3cc7166668566ULL, 0xa68ea77bdddd53ddULL, + 0xb84b2eaf17175c17ULL, 0x02468e4547470147ULL, 0x84dc211a9e9e429eULL, + 0x1ec589d4caca0fcaULL, 0x75995a582d2db42dULL, 0x9179632ebfbfc6bfULL, + 0x381b0e3f07071c07ULL, 0x012347acadad8eadULL, 0xea2fb4b05a5a755aULL, + 0x6cb51bef83833683ULL, 0x85ff66b63333cc33ULL, 0x3ff2c65c63639163ULL, + 0x100a041202020802ULL, 0x39384993aaaa92aaULL, 0xafa8e2de7171d971ULL, + 0x0ecf8dc6c8c807c8ULL, 0xc87d32d119196419ULL, 0x7270923b49493949ULL, + 0x869aaf5fd9d943d9ULL, 0xc31df931f2f2eff2ULL, 0x4b48dba8e3e3abe3ULL, + 0xe22ab6b95b5b715bULL, 0x34920dbc88881a88ULL, 0xa4c8293e9a9a529aULL, + 0x2dbe4c0b26269826ULL, 0x8dfa64bf3232c832ULL, 0xe94a7d59b0b0fab0ULL, + 0x1b6acff2e9e983e9ULL, 0x78331e770f0f3c0fULL, 0xe6a6b733d5d573d5ULL, + 0x74ba1df480803a80ULL, 0x997c6127bebec2beULL, 0x26de87ebcdcd13cdULL, + 0xbde468893434d034ULL, 0x7a75903248483d48ULL, 0xab24e354ffffdbffULL, + 0xf78ff48d7a7af57aULL, 0xf4ea3d6490907a90ULL, 0xc23ebe9d5f5f615fULL, + 0x1da0403d20208020ULL, 0x67d5d00f6868bd68ULL, 0xd07234ca1a1a681aULL, + 0x192c41b7aeae82aeULL, 0xc95e757db4b4eab4ULL, 0x9a19a8ce54544d54ULL, + 0xece53b7f93937693ULL, 0x0daa442f22228822ULL, 0x07e9c86364648d64ULL, + 0xdb12ff2af1f1e3f1ULL, 0xbfa2e6cc7373d173ULL, 0x905a248212124812ULL, + 0x3a5d807a40401d40ULL, 0x4028104808082008ULL, 0x56e89b95c3c32bc3ULL, + 0x337bc5dfecec97ecULL, 0x9690ab4ddbdb4bdbULL, 0x611f5fc0a1a1bea1ULL, + 0x1c8307918d8d0e8dULL, 0xf5c97ac83d3df43dULL, 0xccf1335b97976697ULL, + 0x0000000000000000ULL, 0x36d483f9cfcf1bcfULL, 0x4587566e2b2bac2bULL, + 0x97b3ece17676c576ULL, 0x64b019e682823282ULL, 0xfea9b128d6d67fd6ULL, + 0xd87736c31b1b6c1bULL, 0xc15b7774b5b5eeb5ULL, 0x112943beafaf86afULL, + 0x77dfd41d6a6ab56aULL, 0xba0da0ea50505d50ULL, 0x124c8a5745450945ULL, + 0xcb18fb38f3f3ebf3ULL, 0x9df060ad3030c030ULL, 0x2b74c3c4efef9befULL, + 0xe5c37eda3f3ffc3fULL, 0x921caac755554955ULL, 0x791059dba2a2b2a2ULL, + 0x0365c9e9eaea8feaULL, 0x0fecca6a65658965ULL, 0xb9686903babad2baULL, + 0x65935e4a2f2fbc2fULL, 0x4ee79d8ec0c027c0ULL, 0xbe81a160dede5fdeULL, + 0xe06c38fc1c1c701cULL, 0xbb2ee746fdfdd3fdULL, 0x52649a1f4d4d294dULL, + 0xe4e0397692927292ULL, 0x8fbceafa7575c975ULL, 0x301e0c3606061806ULL, + 0x249809ae8a8a128aULL, 0xf940794bb2b2f2b2ULL, 0x6359d185e6e6bfe6ULL, + 0x70361c7e0e0e380eULL, 0xf8633ee71f1f7c1fULL, 0x37f7c45562629562ULL, + 0xeea3b53ad4d477d4ULL, 0x29324d81a8a89aa8ULL, 0xc4f4315296966296ULL, + 0x9b3aef62f9f9c3f9ULL, 0x66f697a3c5c533c5ULL, 0x35b14a1025259425ULL, + 0xf220b2ab59597959ULL, 0x54ae15d084842a84ULL, 0xb7a7e4c57272d572ULL, + 0xd5dd72ec3939e439ULL, 0x5a6198164c4c2d4cULL, 0xca3bbc945e5e655eULL, + 0xe785f09f7878fd78ULL, 0xddd870e53838e038ULL, 0x148605988c8c0a8cULL, + 0xc6b2bf17d1d163d1ULL, 0x410b57e4a5a5aea5ULL, 0x434dd9a1e2e2afe2ULL, + 0x2ff8c24e61619961ULL, 0xf1457b42b3b3f6b3ULL, 0x15a5423421218421ULL, + 0x94d625089c9c4a9cULL, 0xf0663cee1e1e781eULL, 0x2252866143431143ULL, + 0x76fc93b1c7c73bc7ULL, 0xb32be54ffcfcd7fcULL, 0x2014082404041004ULL, + 0xb208a2e351515951ULL, 0xbcc72f2599995e99ULL, 0x4fc4da226d6da96dULL, + 0x68391a650d0d340dULL, 0x8335e979fafacffaULL, 0xb684a369dfdf5bdfULL, + 0xd79bfca97e7ee57eULL, 0x3db4481924249024ULL, 0xc5d776fe3b3bec3bULL, + 0x313d4b9aabab96abULL, 0x3ed181f0cece1fceULL, 0x8855229911114411ULL, + 0x0c8903838f8f068fULL, 0x4a6b9c044e4e254eULL, 0xd1517366b7b7e6b7ULL, + 0x0b60cbe0ebeb8bebULL, 0xfdcc78c13c3cf03cULL, 0x7cbf1ffd81813e81ULL, + 0xd4fe354094946a94ULL, 0xeb0cf31cf7f7fbf7ULL, 0xa1676f18b9b9deb9ULL, + 0x985f268b13134c13ULL, 0x7d9c58512c2cb02cULL, 0xd6b8bb05d3d36bd3ULL, + 0x6b5cd38ce7e7bbe7ULL, 0x57cbdc396e6ea56eULL, 0x6ef395aac4c437c4ULL, + 0x180f061b03030c03ULL, 0x8a13acdc56564556ULL, 0x1a49885e44440d44ULL, + 0xdf9efea07f7fe17fULL, 0x21374f88a9a99ea9ULL, 0x4d8254672a2aa82aULL, + 0xb16d6b0abbbbd6bbULL, 0x46e29f87c1c123c1ULL, 0xa202a6f153535153ULL, + 0xae8ba572dcdc57dcULL, 0x582716530b0b2c0bULL, 0x9cd327019d9d4e9dULL, + 0x47c1d82b6c6cad6cULL, 0x95f562a43131c431ULL, 0x87b9e8f37474cd74ULL, + 0xe309f115f6f6fff6ULL, 0x0a438c4c46460546ULL, 0x092645a5acac8aacULL, + 0x3c970fb589891e89ULL, 0xa04428b414145014ULL, 0x5b42dfbae1e1a3e1ULL, + 0xb04e2ca616165816ULL, 0xcdd274f73a3ae83aULL, 0x6fd0d2066969b969ULL, + 0x482d124109092409ULL, 0xa7ade0d77070dd70ULL, 0xd954716fb6b6e2b6ULL, + 0xceb7bd1ed0d067d0ULL, 0x3b7ec7d6eded93edULL, 0x2edb85e2cccc17ccULL, + 0x2a57846842421542ULL, 0xb4c22d2c98985a98ULL, 0x490e55eda4a4aaa4ULL, + 0x5d8850752828a028ULL, 0xda31b8865c5c6d5cULL, 0x933fed6bf8f8c7f8ULL, + 0x44a411c286862286ULL, +}; + +static const u64 C5[256] = { + 0x18c07830d8181860ULL, 0x2305af462623238cULL, 0xc67ef991b8c6c63fULL, + 0xe8136fcdfbe8e887ULL, 0x874ca113cb878726ULL, 0xb8a9626d11b8b8daULL, + 0x0108050209010104ULL, 0x4f426e9e0d4f4f21ULL, 0x36adee6c9b3636d8ULL, + 0xa6590451ffa6a6a2ULL, 0xd2debdb90cd2d26fULL, 0xf5fb06f70ef5f5f3ULL, + 0x79ef80f2967979f9ULL, 0x6f5fcede306f6fa1ULL, 0x91fcef3f6d91917eULL, + 0x52aa07a4f8525255ULL, 0x6027fdc04760609dULL, 0xbc89766535bcbccaULL, + 0x9baccd2b379b9b56ULL, 0x8e048c018a8e8e02ULL, 0xa371155bd2a3a3b6ULL, + 0x0c603c186c0c0c30ULL, 0x7bff8af6847b7bf1ULL, 0x35b5e16a803535d4ULL, + 0x1de8693af51d1d74ULL, 0xe05347ddb3e0e0a7ULL, 0xd7f6acb321d7d77bULL, + 0xc25eed999cc2c22fULL, 0x2e6d965c432e2eb8ULL, 0x4b627a96294b4b31ULL, + 0xfea321e15dfefedfULL, 0x578216aed5575741ULL, 0x15a8412abd151554ULL, + 0x779fb6eee87777c1ULL, 0x37a5eb6e923737dcULL, 0xe57b56d79ee5e5b3ULL, + 0x9f8cd923139f9f46ULL, 0xf0d317fd23f0f0e7ULL, 0x4a6a7f94204a4a35ULL, + 0xda9e95a944dada4fULL, 0x58fa25b0a258587dULL, 0xc906ca8fcfc9c903ULL, + 0x29558d527c2929a4ULL, 0x0a5022145a0a0a28ULL, 0xb1e14f7f50b1b1feULL, + 0xa0691a5dc9a0a0baULL, 0x6b7fdad6146b6bb1ULL, 0x855cab17d985852eULL, + 0xbd8173673cbdbdceULL, 0x5dd234ba8f5d5d69ULL, 0x1080502090101040ULL, + 0xf4f303f507f4f4f7ULL, 0xcb16c08bddcbcb0bULL, 0x3eedc67cd33e3ef8ULL, + 0x0528110a2d050514ULL, 0x671fe6ce78676781ULL, 0xe47353d597e4e4b7ULL, + 0x2725bb4e0227279cULL, 0x4132588273414119ULL, 0x8b2c9d0ba78b8b16ULL, + 0xa7510153f6a7a7a6ULL, 0x7dcf94fab27d7de9ULL, 0x95dcfb374995956eULL, + 0xd88e9fad56d8d847ULL, 0xfb8b30eb70fbfbcbULL, 0xee2371c1cdeeee9fULL, + 0x7cc791f8bb7c7cedULL, 0x6617e3cc71666685ULL, 0xdda68ea77bdddd53ULL, + 0x17b84b2eaf17175cULL, 0x4702468e45474701ULL, 0x9e84dc211a9e9e42ULL, + 0xca1ec589d4caca0fULL, 0x2d75995a582d2db4ULL, 0xbf9179632ebfbfc6ULL, + 0x07381b0e3f07071cULL, 0xad012347acadad8eULL, 0x5aea2fb4b05a5a75ULL, + 0x836cb51bef838336ULL, 0x3385ff66b63333ccULL, 0x633ff2c65c636391ULL, + 0x02100a0412020208ULL, 0xaa39384993aaaa92ULL, 0x71afa8e2de7171d9ULL, + 0xc80ecf8dc6c8c807ULL, 0x19c87d32d1191964ULL, 0x497270923b494939ULL, + 0xd9869aaf5fd9d943ULL, 0xf2c31df931f2f2efULL, 0xe34b48dba8e3e3abULL, + 0x5be22ab6b95b5b71ULL, 0x8834920dbc88881aULL, 0x9aa4c8293e9a9a52ULL, + 0x262dbe4c0b262698ULL, 0x328dfa64bf3232c8ULL, 0xb0e94a7d59b0b0faULL, + 0xe91b6acff2e9e983ULL, 0x0f78331e770f0f3cULL, 0xd5e6a6b733d5d573ULL, + 0x8074ba1df480803aULL, 0xbe997c6127bebec2ULL, 0xcd26de87ebcdcd13ULL, + 0x34bde468893434d0ULL, 0x487a75903248483dULL, 0xffab24e354ffffdbULL, + 0x7af78ff48d7a7af5ULL, 0x90f4ea3d6490907aULL, 0x5fc23ebe9d5f5f61ULL, + 0x201da0403d202080ULL, 0x6867d5d00f6868bdULL, 0x1ad07234ca1a1a68ULL, + 0xae192c41b7aeae82ULL, 0xb4c95e757db4b4eaULL, 0x549a19a8ce54544dULL, + 0x93ece53b7f939376ULL, 0x220daa442f222288ULL, 0x6407e9c86364648dULL, + 0xf1db12ff2af1f1e3ULL, 0x73bfa2e6cc7373d1ULL, 0x12905a2482121248ULL, + 0x403a5d807a40401dULL, 0x0840281048080820ULL, 0xc356e89b95c3c32bULL, + 0xec337bc5dfecec97ULL, 0xdb9690ab4ddbdb4bULL, 0xa1611f5fc0a1a1beULL, + 0x8d1c8307918d8d0eULL, 0x3df5c97ac83d3df4ULL, 0x97ccf1335b979766ULL, + 0x0000000000000000ULL, 0xcf36d483f9cfcf1bULL, 0x2b4587566e2b2bacULL, + 0x7697b3ece17676c5ULL, 0x8264b019e6828232ULL, 0xd6fea9b128d6d67fULL, + 0x1bd87736c31b1b6cULL, 0xb5c15b7774b5b5eeULL, 0xaf112943beafaf86ULL, + 0x6a77dfd41d6a6ab5ULL, 0x50ba0da0ea50505dULL, 0x45124c8a57454509ULL, + 0xf3cb18fb38f3f3ebULL, 0x309df060ad3030c0ULL, 0xef2b74c3c4efef9bULL, + 0x3fe5c37eda3f3ffcULL, 0x55921caac7555549ULL, 0xa2791059dba2a2b2ULL, + 0xea0365c9e9eaea8fULL, 0x650fecca6a656589ULL, 0xbab9686903babad2ULL, + 0x2f65935e4a2f2fbcULL, 0xc04ee79d8ec0c027ULL, 0xdebe81a160dede5fULL, + 0x1ce06c38fc1c1c70ULL, 0xfdbb2ee746fdfdd3ULL, 0x4d52649a1f4d4d29ULL, + 0x92e4e03976929272ULL, 0x758fbceafa7575c9ULL, 0x06301e0c36060618ULL, + 0x8a249809ae8a8a12ULL, 0xb2f940794bb2b2f2ULL, 0xe66359d185e6e6bfULL, + 0x0e70361c7e0e0e38ULL, 0x1ff8633ee71f1f7cULL, 0x6237f7c455626295ULL, + 0xd4eea3b53ad4d477ULL, 0xa829324d81a8a89aULL, 0x96c4f43152969662ULL, + 0xf99b3aef62f9f9c3ULL, 0xc566f697a3c5c533ULL, 0x2535b14a10252594ULL, + 0x59f220b2ab595979ULL, 0x8454ae15d084842aULL, 0x72b7a7e4c57272d5ULL, + 0x39d5dd72ec3939e4ULL, 0x4c5a6198164c4c2dULL, 0x5eca3bbc945e5e65ULL, + 0x78e785f09f7878fdULL, 0x38ddd870e53838e0ULL, 0x8c148605988c8c0aULL, + 0xd1c6b2bf17d1d163ULL, 0xa5410b57e4a5a5aeULL, 0xe2434dd9a1e2e2afULL, + 0x612ff8c24e616199ULL, 0xb3f1457b42b3b3f6ULL, 0x2115a54234212184ULL, + 0x9c94d625089c9c4aULL, 0x1ef0663cee1e1e78ULL, 0x4322528661434311ULL, + 0xc776fc93b1c7c73bULL, 0xfcb32be54ffcfcd7ULL, 0x0420140824040410ULL, + 0x51b208a2e3515159ULL, 0x99bcc72f2599995eULL, 0x6d4fc4da226d6da9ULL, + 0x0d68391a650d0d34ULL, 0xfa8335e979fafacfULL, 0xdfb684a369dfdf5bULL, + 0x7ed79bfca97e7ee5ULL, 0x243db44819242490ULL, 0x3bc5d776fe3b3becULL, + 0xab313d4b9aabab96ULL, 0xce3ed181f0cece1fULL, 0x1188552299111144ULL, + 0x8f0c8903838f8f06ULL, 0x4e4a6b9c044e4e25ULL, 0xb7d1517366b7b7e6ULL, + 0xeb0b60cbe0ebeb8bULL, 0x3cfdcc78c13c3cf0ULL, 0x817cbf1ffd81813eULL, + 0x94d4fe354094946aULL, 0xf7eb0cf31cf7f7fbULL, 0xb9a1676f18b9b9deULL, + 0x13985f268b13134cULL, 0x2c7d9c58512c2cb0ULL, 0xd3d6b8bb05d3d36bULL, + 0xe76b5cd38ce7e7bbULL, 0x6e57cbdc396e6ea5ULL, 0xc46ef395aac4c437ULL, + 0x03180f061b03030cULL, 0x568a13acdc565645ULL, 0x441a49885e44440dULL, + 0x7fdf9efea07f7fe1ULL, 0xa921374f88a9a99eULL, 0x2a4d8254672a2aa8ULL, + 0xbbb16d6b0abbbbd6ULL, 0xc146e29f87c1c123ULL, 0x53a202a6f1535351ULL, + 0xdcae8ba572dcdc57ULL, 0x0b582716530b0b2cULL, 0x9d9cd327019d9d4eULL, + 0x6c47c1d82b6c6cadULL, 0x3195f562a43131c4ULL, 0x7487b9e8f37474cdULL, + 0xf6e309f115f6f6ffULL, 0x460a438c4c464605ULL, 0xac092645a5acac8aULL, + 0x893c970fb589891eULL, 0x14a04428b4141450ULL, 0xe15b42dfbae1e1a3ULL, + 0x16b04e2ca6161658ULL, 0x3acdd274f73a3ae8ULL, 0x696fd0d2066969b9ULL, + 0x09482d1241090924ULL, 0x70a7ade0d77070ddULL, 0xb6d954716fb6b6e2ULL, + 0xd0ceb7bd1ed0d067ULL, 0xed3b7ec7d6eded93ULL, 0xcc2edb85e2cccc17ULL, + 0x422a578468424215ULL, 0x98b4c22d2c98985aULL, 0xa4490e55eda4a4aaULL, + 0x285d8850752828a0ULL, 0x5cda31b8865c5c6dULL, 0xf8933fed6bf8f8c7ULL, + 0x8644a411c2868622ULL, +}; + +static const u64 C6[256] = { + 0x6018c07830d81818ULL, 0x8c2305af46262323ULL, 0x3fc67ef991b8c6c6ULL, + 0x87e8136fcdfbe8e8ULL, 0x26874ca113cb8787ULL, 0xdab8a9626d11b8b8ULL, + 0x0401080502090101ULL, 0x214f426e9e0d4f4fULL, 0xd836adee6c9b3636ULL, + 0xa2a6590451ffa6a6ULL, 0x6fd2debdb90cd2d2ULL, 0xf3f5fb06f70ef5f5ULL, + 0xf979ef80f2967979ULL, 0xa16f5fcede306f6fULL, 0x7e91fcef3f6d9191ULL, + 0x5552aa07a4f85252ULL, 0x9d6027fdc0476060ULL, 0xcabc89766535bcbcULL, + 0x569baccd2b379b9bULL, 0x028e048c018a8e8eULL, 0xb6a371155bd2a3a3ULL, + 0x300c603c186c0c0cULL, 0xf17bff8af6847b7bULL, 0xd435b5e16a803535ULL, + 0x741de8693af51d1dULL, 0xa7e05347ddb3e0e0ULL, 0x7bd7f6acb321d7d7ULL, + 0x2fc25eed999cc2c2ULL, 0xb82e6d965c432e2eULL, 0x314b627a96294b4bULL, + 0xdffea321e15dfefeULL, 0x41578216aed55757ULL, 0x5415a8412abd1515ULL, + 0xc1779fb6eee87777ULL, 0xdc37a5eb6e923737ULL, 0xb3e57b56d79ee5e5ULL, + 0x469f8cd923139f9fULL, 0xe7f0d317fd23f0f0ULL, 0x354a6a7f94204a4aULL, + 0x4fda9e95a944dadaULL, 0x7d58fa25b0a25858ULL, 0x03c906ca8fcfc9c9ULL, + 0xa429558d527c2929ULL, 0x280a5022145a0a0aULL, 0xfeb1e14f7f50b1b1ULL, + 0xbaa0691a5dc9a0a0ULL, 0xb16b7fdad6146b6bULL, 0x2e855cab17d98585ULL, + 0xcebd8173673cbdbdULL, 0x695dd234ba8f5d5dULL, 0x4010805020901010ULL, + 0xf7f4f303f507f4f4ULL, 0x0bcb16c08bddcbcbULL, 0xf83eedc67cd33e3eULL, + 0x140528110a2d0505ULL, 0x81671fe6ce786767ULL, 0xb7e47353d597e4e4ULL, + 0x9c2725bb4e022727ULL, 0x1941325882734141ULL, 0x168b2c9d0ba78b8bULL, + 0xa6a7510153f6a7a7ULL, 0xe97dcf94fab27d7dULL, 0x6e95dcfb37499595ULL, + 0x47d88e9fad56d8d8ULL, 0xcbfb8b30eb70fbfbULL, 0x9fee2371c1cdeeeeULL, + 0xed7cc791f8bb7c7cULL, 0x856617e3cc716666ULL, 0x53dda68ea77bddddULL, + 0x5c17b84b2eaf1717ULL, 0x014702468e454747ULL, 0x429e84dc211a9e9eULL, + 0x0fca1ec589d4cacaULL, 0xb42d75995a582d2dULL, 0xc6bf9179632ebfbfULL, + 0x1c07381b0e3f0707ULL, 0x8ead012347acadadULL, 0x755aea2fb4b05a5aULL, + 0x36836cb51bef8383ULL, 0xcc3385ff66b63333ULL, 0x91633ff2c65c6363ULL, + 0x0802100a04120202ULL, 0x92aa39384993aaaaULL, 0xd971afa8e2de7171ULL, + 0x07c80ecf8dc6c8c8ULL, 0x6419c87d32d11919ULL, 0x39497270923b4949ULL, + 0x43d9869aaf5fd9d9ULL, 0xeff2c31df931f2f2ULL, 0xabe34b48dba8e3e3ULL, + 0x715be22ab6b95b5bULL, 0x1a8834920dbc8888ULL, 0x529aa4c8293e9a9aULL, + 0x98262dbe4c0b2626ULL, 0xc8328dfa64bf3232ULL, 0xfab0e94a7d59b0b0ULL, + 0x83e91b6acff2e9e9ULL, 0x3c0f78331e770f0fULL, 0x73d5e6a6b733d5d5ULL, + 0x3a8074ba1df48080ULL, 0xc2be997c6127bebeULL, 0x13cd26de87ebcdcdULL, + 0xd034bde468893434ULL, 0x3d487a7590324848ULL, 0xdbffab24e354ffffULL, + 0xf57af78ff48d7a7aULL, 0x7a90f4ea3d649090ULL, 0x615fc23ebe9d5f5fULL, + 0x80201da0403d2020ULL, 0xbd6867d5d00f6868ULL, 0x681ad07234ca1a1aULL, + 0x82ae192c41b7aeaeULL, 0xeab4c95e757db4b4ULL, 0x4d549a19a8ce5454ULL, + 0x7693ece53b7f9393ULL, 0x88220daa442f2222ULL, 0x8d6407e9c8636464ULL, + 0xe3f1db12ff2af1f1ULL, 0xd173bfa2e6cc7373ULL, 0x4812905a24821212ULL, + 0x1d403a5d807a4040ULL, 0x2008402810480808ULL, 0x2bc356e89b95c3c3ULL, + 0x97ec337bc5dfececULL, 0x4bdb9690ab4ddbdbULL, 0xbea1611f5fc0a1a1ULL, + 0x0e8d1c8307918d8dULL, 0xf43df5c97ac83d3dULL, 0x6697ccf1335b9797ULL, + 0x0000000000000000ULL, 0x1bcf36d483f9cfcfULL, 0xac2b4587566e2b2bULL, + 0xc57697b3ece17676ULL, 0x328264b019e68282ULL, 0x7fd6fea9b128d6d6ULL, + 0x6c1bd87736c31b1bULL, 0xeeb5c15b7774b5b5ULL, 0x86af112943beafafULL, + 0xb56a77dfd41d6a6aULL, 0x5d50ba0da0ea5050ULL, 0x0945124c8a574545ULL, + 0xebf3cb18fb38f3f3ULL, 0xc0309df060ad3030ULL, 0x9bef2b74c3c4efefULL, + 0xfc3fe5c37eda3f3fULL, 0x4955921caac75555ULL, 0xb2a2791059dba2a2ULL, + 0x8fea0365c9e9eaeaULL, 0x89650fecca6a6565ULL, 0xd2bab9686903babaULL, + 0xbc2f65935e4a2f2fULL, 0x27c04ee79d8ec0c0ULL, 0x5fdebe81a160dedeULL, + 0x701ce06c38fc1c1cULL, 0xd3fdbb2ee746fdfdULL, 0x294d52649a1f4d4dULL, + 0x7292e4e039769292ULL, 0xc9758fbceafa7575ULL, 0x1806301e0c360606ULL, + 0x128a249809ae8a8aULL, 0xf2b2f940794bb2b2ULL, 0xbfe66359d185e6e6ULL, + 0x380e70361c7e0e0eULL, 0x7c1ff8633ee71f1fULL, 0x956237f7c4556262ULL, + 0x77d4eea3b53ad4d4ULL, 0x9aa829324d81a8a8ULL, 0x6296c4f431529696ULL, + 0xc3f99b3aef62f9f9ULL, 0x33c566f697a3c5c5ULL, 0x942535b14a102525ULL, + 0x7959f220b2ab5959ULL, 0x2a8454ae15d08484ULL, 0xd572b7a7e4c57272ULL, + 0xe439d5dd72ec3939ULL, 0x2d4c5a6198164c4cULL, 0x655eca3bbc945e5eULL, + 0xfd78e785f09f7878ULL, 0xe038ddd870e53838ULL, 0x0a8c148605988c8cULL, + 0x63d1c6b2bf17d1d1ULL, 0xaea5410b57e4a5a5ULL, 0xafe2434dd9a1e2e2ULL, + 0x99612ff8c24e6161ULL, 0xf6b3f1457b42b3b3ULL, 0x842115a542342121ULL, + 0x4a9c94d625089c9cULL, 0x781ef0663cee1e1eULL, 0x1143225286614343ULL, + 0x3bc776fc93b1c7c7ULL, 0xd7fcb32be54ffcfcULL, 0x1004201408240404ULL, + 0x5951b208a2e35151ULL, 0x5e99bcc72f259999ULL, 0xa96d4fc4da226d6dULL, + 0x340d68391a650d0dULL, 0xcffa8335e979fafaULL, 0x5bdfb684a369dfdfULL, + 0xe57ed79bfca97e7eULL, 0x90243db448192424ULL, 0xec3bc5d776fe3b3bULL, + 0x96ab313d4b9aababULL, 0x1fce3ed181f0ceceULL, 0x4411885522991111ULL, + 0x068f0c8903838f8fULL, 0x254e4a6b9c044e4eULL, 0xe6b7d1517366b7b7ULL, + 0x8beb0b60cbe0ebebULL, 0xf03cfdcc78c13c3cULL, 0x3e817cbf1ffd8181ULL, + 0x6a94d4fe35409494ULL, 0xfbf7eb0cf31cf7f7ULL, 0xdeb9a1676f18b9b9ULL, + 0x4c13985f268b1313ULL, 0xb02c7d9c58512c2cULL, 0x6bd3d6b8bb05d3d3ULL, + 0xbbe76b5cd38ce7e7ULL, 0xa56e57cbdc396e6eULL, 0x37c46ef395aac4c4ULL, + 0x0c03180f061b0303ULL, 0x45568a13acdc5656ULL, 0x0d441a49885e4444ULL, + 0xe17fdf9efea07f7fULL, 0x9ea921374f88a9a9ULL, 0xa82a4d8254672a2aULL, + 0xd6bbb16d6b0abbbbULL, 0x23c146e29f87c1c1ULL, 0x5153a202a6f15353ULL, + 0x57dcae8ba572dcdcULL, 0x2c0b582716530b0bULL, 0x4e9d9cd327019d9dULL, + 0xad6c47c1d82b6c6cULL, 0xc43195f562a43131ULL, 0xcd7487b9e8f37474ULL, + 0xfff6e309f115f6f6ULL, 0x05460a438c4c4646ULL, 0x8aac092645a5acacULL, + 0x1e893c970fb58989ULL, 0x5014a04428b41414ULL, 0xa3e15b42dfbae1e1ULL, + 0x5816b04e2ca61616ULL, 0xe83acdd274f73a3aULL, 0xb9696fd0d2066969ULL, + 0x2409482d12410909ULL, 0xdd70a7ade0d77070ULL, 0xe2b6d954716fb6b6ULL, + 0x67d0ceb7bd1ed0d0ULL, 0x93ed3b7ec7d6ededULL, 0x17cc2edb85e2ccccULL, + 0x15422a5784684242ULL, 0x5a98b4c22d2c9898ULL, 0xaaa4490e55eda4a4ULL, + 0xa0285d8850752828ULL, 0x6d5cda31b8865c5cULL, 0xc7f8933fed6bf8f8ULL, + 0x228644a411c28686ULL, +}; + +static const u64 C7[256] = { + 0x186018c07830d818ULL, 0x238c2305af462623ULL, 0xc63fc67ef991b8c6ULL, + 0xe887e8136fcdfbe8ULL, 0x8726874ca113cb87ULL, 0xb8dab8a9626d11b8ULL, + 0x0104010805020901ULL, 0x4f214f426e9e0d4fULL, 0x36d836adee6c9b36ULL, + 0xa6a2a6590451ffa6ULL, 0xd26fd2debdb90cd2ULL, 0xf5f3f5fb06f70ef5ULL, + 0x79f979ef80f29679ULL, 0x6fa16f5fcede306fULL, 0x917e91fcef3f6d91ULL, + 0x525552aa07a4f852ULL, 0x609d6027fdc04760ULL, 0xbccabc89766535bcULL, + 0x9b569baccd2b379bULL, 0x8e028e048c018a8eULL, 0xa3b6a371155bd2a3ULL, + 0x0c300c603c186c0cULL, 0x7bf17bff8af6847bULL, 0x35d435b5e16a8035ULL, + 0x1d741de8693af51dULL, 0xe0a7e05347ddb3e0ULL, 0xd77bd7f6acb321d7ULL, + 0xc22fc25eed999cc2ULL, 0x2eb82e6d965c432eULL, 0x4b314b627a96294bULL, + 0xfedffea321e15dfeULL, 0x5741578216aed557ULL, 0x155415a8412abd15ULL, + 0x77c1779fb6eee877ULL, 0x37dc37a5eb6e9237ULL, 0xe5b3e57b56d79ee5ULL, + 0x9f469f8cd923139fULL, 0xf0e7f0d317fd23f0ULL, 0x4a354a6a7f94204aULL, + 0xda4fda9e95a944daULL, 0x587d58fa25b0a258ULL, 0xc903c906ca8fcfc9ULL, + 0x29a429558d527c29ULL, 0x0a280a5022145a0aULL, 0xb1feb1e14f7f50b1ULL, + 0xa0baa0691a5dc9a0ULL, 0x6bb16b7fdad6146bULL, 0x852e855cab17d985ULL, + 0xbdcebd8173673cbdULL, 0x5d695dd234ba8f5dULL, 0x1040108050209010ULL, + 0xf4f7f4f303f507f4ULL, 0xcb0bcb16c08bddcbULL, 0x3ef83eedc67cd33eULL, + 0x05140528110a2d05ULL, 0x6781671fe6ce7867ULL, 0xe4b7e47353d597e4ULL, + 0x279c2725bb4e0227ULL, 0x4119413258827341ULL, 0x8b168b2c9d0ba78bULL, + 0xa7a6a7510153f6a7ULL, 0x7de97dcf94fab27dULL, 0x956e95dcfb374995ULL, + 0xd847d88e9fad56d8ULL, 0xfbcbfb8b30eb70fbULL, 0xee9fee2371c1cdeeULL, + 0x7ced7cc791f8bb7cULL, 0x66856617e3cc7166ULL, 0xdd53dda68ea77bddULL, + 0x175c17b84b2eaf17ULL, 0x47014702468e4547ULL, 0x9e429e84dc211a9eULL, + 0xca0fca1ec589d4caULL, 0x2db42d75995a582dULL, 0xbfc6bf9179632ebfULL, + 0x071c07381b0e3f07ULL, 0xad8ead012347acadULL, 0x5a755aea2fb4b05aULL, + 0x8336836cb51bef83ULL, 0x33cc3385ff66b633ULL, 0x6391633ff2c65c63ULL, + 0x020802100a041202ULL, 0xaa92aa39384993aaULL, 0x71d971afa8e2de71ULL, + 0xc807c80ecf8dc6c8ULL, 0x196419c87d32d119ULL, 0x4939497270923b49ULL, + 0xd943d9869aaf5fd9ULL, 0xf2eff2c31df931f2ULL, 0xe3abe34b48dba8e3ULL, + 0x5b715be22ab6b95bULL, 0x881a8834920dbc88ULL, 0x9a529aa4c8293e9aULL, + 0x2698262dbe4c0b26ULL, 0x32c8328dfa64bf32ULL, 0xb0fab0e94a7d59b0ULL, + 0xe983e91b6acff2e9ULL, 0x0f3c0f78331e770fULL, 0xd573d5e6a6b733d5ULL, + 0x803a8074ba1df480ULL, 0xbec2be997c6127beULL, 0xcd13cd26de87ebcdULL, + 0x34d034bde4688934ULL, 0x483d487a75903248ULL, 0xffdbffab24e354ffULL, + 0x7af57af78ff48d7aULL, 0x907a90f4ea3d6490ULL, 0x5f615fc23ebe9d5fULL, + 0x2080201da0403d20ULL, 0x68bd6867d5d00f68ULL, 0x1a681ad07234ca1aULL, + 0xae82ae192c41b7aeULL, 0xb4eab4c95e757db4ULL, 0x544d549a19a8ce54ULL, + 0x937693ece53b7f93ULL, 0x2288220daa442f22ULL, 0x648d6407e9c86364ULL, + 0xf1e3f1db12ff2af1ULL, 0x73d173bfa2e6cc73ULL, 0x124812905a248212ULL, + 0x401d403a5d807a40ULL, 0x0820084028104808ULL, 0xc32bc356e89b95c3ULL, + 0xec97ec337bc5dfecULL, 0xdb4bdb9690ab4ddbULL, 0xa1bea1611f5fc0a1ULL, + 0x8d0e8d1c8307918dULL, 0x3df43df5c97ac83dULL, 0x976697ccf1335b97ULL, + 0x0000000000000000ULL, 0xcf1bcf36d483f9cfULL, 0x2bac2b4587566e2bULL, + 0x76c57697b3ece176ULL, 0x82328264b019e682ULL, 0xd67fd6fea9b128d6ULL, + 0x1b6c1bd87736c31bULL, 0xb5eeb5c15b7774b5ULL, 0xaf86af112943beafULL, + 0x6ab56a77dfd41d6aULL, 0x505d50ba0da0ea50ULL, 0x450945124c8a5745ULL, + 0xf3ebf3cb18fb38f3ULL, 0x30c0309df060ad30ULL, 0xef9bef2b74c3c4efULL, + 0x3ffc3fe5c37eda3fULL, 0x554955921caac755ULL, 0xa2b2a2791059dba2ULL, + 0xea8fea0365c9e9eaULL, 0x6589650fecca6a65ULL, 0xbad2bab9686903baULL, + 0x2fbc2f65935e4a2fULL, 0xc027c04ee79d8ec0ULL, 0xde5fdebe81a160deULL, + 0x1c701ce06c38fc1cULL, 0xfdd3fdbb2ee746fdULL, 0x4d294d52649a1f4dULL, + 0x927292e4e0397692ULL, 0x75c9758fbceafa75ULL, 0x061806301e0c3606ULL, + 0x8a128a249809ae8aULL, 0xb2f2b2f940794bb2ULL, 0xe6bfe66359d185e6ULL, + 0x0e380e70361c7e0eULL, 0x1f7c1ff8633ee71fULL, 0x62956237f7c45562ULL, + 0xd477d4eea3b53ad4ULL, 0xa89aa829324d81a8ULL, 0x966296c4f4315296ULL, + 0xf9c3f99b3aef62f9ULL, 0xc533c566f697a3c5ULL, 0x25942535b14a1025ULL, + 0x597959f220b2ab59ULL, 0x842a8454ae15d084ULL, 0x72d572b7a7e4c572ULL, + 0x39e439d5dd72ec39ULL, 0x4c2d4c5a6198164cULL, 0x5e655eca3bbc945eULL, + 0x78fd78e785f09f78ULL, 0x38e038ddd870e538ULL, 0x8c0a8c148605988cULL, + 0xd163d1c6b2bf17d1ULL, 0xa5aea5410b57e4a5ULL, 0xe2afe2434dd9a1e2ULL, + 0x6199612ff8c24e61ULL, 0xb3f6b3f1457b42b3ULL, 0x21842115a5423421ULL, + 0x9c4a9c94d625089cULL, 0x1e781ef0663cee1eULL, 0x4311432252866143ULL, + 0xc73bc776fc93b1c7ULL, 0xfcd7fcb32be54ffcULL, 0x0410042014082404ULL, + 0x515951b208a2e351ULL, 0x995e99bcc72f2599ULL, 0x6da96d4fc4da226dULL, + 0x0d340d68391a650dULL, 0xfacffa8335e979faULL, 0xdf5bdfb684a369dfULL, + 0x7ee57ed79bfca97eULL, 0x2490243db4481924ULL, 0x3bec3bc5d776fe3bULL, + 0xab96ab313d4b9aabULL, 0xce1fce3ed181f0ceULL, 0x1144118855229911ULL, + 0x8f068f0c8903838fULL, 0x4e254e4a6b9c044eULL, 0xb7e6b7d1517366b7ULL, + 0xeb8beb0b60cbe0ebULL, 0x3cf03cfdcc78c13cULL, 0x813e817cbf1ffd81ULL, + 0x946a94d4fe354094ULL, 0xf7fbf7eb0cf31cf7ULL, 0xb9deb9a1676f18b9ULL, + 0x134c13985f268b13ULL, 0x2cb02c7d9c58512cULL, 0xd36bd3d6b8bb05d3ULL, + 0xe7bbe76b5cd38ce7ULL, 0x6ea56e57cbdc396eULL, 0xc437c46ef395aac4ULL, + 0x030c03180f061b03ULL, 0x5645568a13acdc56ULL, 0x440d441a49885e44ULL, + 0x7fe17fdf9efea07fULL, 0xa99ea921374f88a9ULL, 0x2aa82a4d8254672aULL, + 0xbbd6bbb16d6b0abbULL, 0xc123c146e29f87c1ULL, 0x535153a202a6f153ULL, + 0xdc57dcae8ba572dcULL, 0x0b2c0b582716530bULL, 0x9d4e9d9cd327019dULL, + 0x6cad6c47c1d82b6cULL, 0x31c43195f562a431ULL, 0x74cd7487b9e8f374ULL, + 0xf6fff6e309f115f6ULL, 0x4605460a438c4c46ULL, 0xac8aac092645a5acULL, + 0x891e893c970fb589ULL, 0x145014a04428b414ULL, 0xe1a3e15b42dfbae1ULL, + 0x165816b04e2ca616ULL, 0x3ae83acdd274f73aULL, 0x69b9696fd0d20669ULL, + 0x092409482d124109ULL, 0x70dd70a7ade0d770ULL, 0xb6e2b6d954716fb6ULL, + 0xd067d0ceb7bd1ed0ULL, 0xed93ed3b7ec7d6edULL, 0xcc17cc2edb85e2ccULL, + 0x4215422a57846842ULL, 0x985a98b4c22d2c98ULL, 0xa4aaa4490e55eda4ULL, + 0x28a0285d88507528ULL, 0x5c6d5cda31b8865cULL, 0xf8c7f8933fed6bf8ULL, + 0x86228644a411c286ULL, +}; + +static const u64 rc[WHIRLPOOL_ROUNDS + 1] = { + 0x0000000000000000ULL, 0x1823c6e887b8014fULL, 0x36a6d2f5796f9152ULL, + 0x60bc9b8ea30c7b35ULL, 0x1de0d7c22e4bfe57ULL, 0x157737e59ff04adaULL, + 0x58c9290ab1a06b85ULL, 0xbd5d10f4cb3e0567ULL, 0xe427418ba77d95d8ULL, + 0xfbee7c66dd17479eULL, 0xca2dbf07ad5a8333ULL, +}; + +/** + * The core Whirlpool transform. + */ + +static void wp512_process_buffer(struct wp512_ctx *wctx) { + int i, r; + u64 K[8]; /* the round key */ + u64 block[8]; /* mu(buffer) */ + u64 state[8]; /* the cipher state */ + u64 L[8]; + u8 *buffer = wctx->buffer; + + for (i = 0; i < 8; i++, buffer += 8) { + block[i] = + (((u64)buffer[0] ) << 56) ^ + (((u64)buffer[1] & 0xffL) << 48) ^ + (((u64)buffer[2] & 0xffL) << 40) ^ + (((u64)buffer[3] & 0xffL) << 32) ^ + (((u64)buffer[4] & 0xffL) << 24) ^ + (((u64)buffer[5] & 0xffL) << 16) ^ + (((u64)buffer[6] & 0xffL) << 8) ^ + (((u64)buffer[7] & 0xffL) ); + } + + state[0] = block[0] ^ (K[0] = wctx->hash[0]); + state[1] = block[1] ^ (K[1] = wctx->hash[1]); + state[2] = block[2] ^ (K[2] = wctx->hash[2]); + state[3] = block[3] ^ (K[3] = wctx->hash[3]); + state[4] = block[4] ^ (K[4] = wctx->hash[4]); + state[5] = block[5] ^ (K[5] = wctx->hash[5]); + state[6] = block[6] ^ (K[6] = wctx->hash[6]); + state[7] = block[7] ^ (K[7] = wctx->hash[7]); + + for (r = 1; r <= WHIRLPOOL_ROUNDS; r++) { + + L[0] = C0[(int)(K[0] >> 56) ] ^ + C1[(int)(K[7] >> 48) & 0xff] ^ + C2[(int)(K[6] >> 40) & 0xff] ^ + C3[(int)(K[5] >> 32) & 0xff] ^ + C4[(int)(K[4] >> 24) & 0xff] ^ + C5[(int)(K[3] >> 16) & 0xff] ^ + C6[(int)(K[2] >> 8) & 0xff] ^ + C7[(int)(K[1] ) & 0xff] ^ + rc[r]; + + L[1] = C0[(int)(K[1] >> 56) ] ^ + C1[(int)(K[0] >> 48) & 0xff] ^ + C2[(int)(K[7] >> 40) & 0xff] ^ + C3[(int)(K[6] >> 32) & 0xff] ^ + C4[(int)(K[5] >> 24) & 0xff] ^ + C5[(int)(K[4] >> 16) & 0xff] ^ + C6[(int)(K[3] >> 8) & 0xff] ^ + C7[(int)(K[2] ) & 0xff]; + + L[2] = C0[(int)(K[2] >> 56) ] ^ + C1[(int)(K[1] >> 48) & 0xff] ^ + C2[(int)(K[0] >> 40) & 0xff] ^ + C3[(int)(K[7] >> 32) & 0xff] ^ + C4[(int)(K[6] >> 24) & 0xff] ^ + C5[(int)(K[5] >> 16) & 0xff] ^ + C6[(int)(K[4] >> 8) & 0xff] ^ + C7[(int)(K[3] ) & 0xff]; + + L[3] = C0[(int)(K[3] >> 56) ] ^ + C1[(int)(K[2] >> 48) & 0xff] ^ + C2[(int)(K[1] >> 40) & 0xff] ^ + C3[(int)(K[0] >> 32) & 0xff] ^ + C4[(int)(K[7] >> 24) & 0xff] ^ + C5[(int)(K[6] >> 16) & 0xff] ^ + C6[(int)(K[5] >> 8) & 0xff] ^ + C7[(int)(K[4] ) & 0xff]; + + L[4] = C0[(int)(K[4] >> 56) ] ^ + C1[(int)(K[3] >> 48) & 0xff] ^ + C2[(int)(K[2] >> 40) & 0xff] ^ + C3[(int)(K[1] >> 32) & 0xff] ^ + C4[(int)(K[0] >> 24) & 0xff] ^ + C5[(int)(K[7] >> 16) & 0xff] ^ + C6[(int)(K[6] >> 8) & 0xff] ^ + C7[(int)(K[5] ) & 0xff]; + + L[5] = C0[(int)(K[5] >> 56) ] ^ + C1[(int)(K[4] >> 48) & 0xff] ^ + C2[(int)(K[3] >> 40) & 0xff] ^ + C3[(int)(K[2] >> 32) & 0xff] ^ + C4[(int)(K[1] >> 24) & 0xff] ^ + C5[(int)(K[0] >> 16) & 0xff] ^ + C6[(int)(K[7] >> 8) & 0xff] ^ + C7[(int)(K[6] ) & 0xff]; + + L[6] = C0[(int)(K[6] >> 56) ] ^ + C1[(int)(K[5] >> 48) & 0xff] ^ + C2[(int)(K[4] >> 40) & 0xff] ^ + C3[(int)(K[3] >> 32) & 0xff] ^ + C4[(int)(K[2] >> 24) & 0xff] ^ + C5[(int)(K[1] >> 16) & 0xff] ^ + C6[(int)(K[0] >> 8) & 0xff] ^ + C7[(int)(K[7] ) & 0xff]; + + L[7] = C0[(int)(K[7] >> 56) ] ^ + C1[(int)(K[6] >> 48) & 0xff] ^ + C2[(int)(K[5] >> 40) & 0xff] ^ + C3[(int)(K[4] >> 32) & 0xff] ^ + C4[(int)(K[3] >> 24) & 0xff] ^ + C5[(int)(K[2] >> 16) & 0xff] ^ + C6[(int)(K[1] >> 8) & 0xff] ^ + C7[(int)(K[0] ) & 0xff]; + + K[0] = L[0]; + K[1] = L[1]; + K[2] = L[2]; + K[3] = L[3]; + K[4] = L[4]; + K[5] = L[5]; + K[6] = L[6]; + K[7] = L[7]; + + L[0] = C0[(int)(state[0] >> 56) ] ^ + C1[(int)(state[7] >> 48) & 0xff] ^ + C2[(int)(state[6] >> 40) & 0xff] ^ + C3[(int)(state[5] >> 32) & 0xff] ^ + C4[(int)(state[4] >> 24) & 0xff] ^ + C5[(int)(state[3] >> 16) & 0xff] ^ + C6[(int)(state[2] >> 8) & 0xff] ^ + C7[(int)(state[1] ) & 0xff] ^ + K[0]; + + L[1] = C0[(int)(state[1] >> 56) ] ^ + C1[(int)(state[0] >> 48) & 0xff] ^ + C2[(int)(state[7] >> 40) & 0xff] ^ + C3[(int)(state[6] >> 32) & 0xff] ^ + C4[(int)(state[5] >> 24) & 0xff] ^ + C5[(int)(state[4] >> 16) & 0xff] ^ + C6[(int)(state[3] >> 8) & 0xff] ^ + C7[(int)(state[2] ) & 0xff] ^ + K[1]; + + L[2] = C0[(int)(state[2] >> 56) ] ^ + C1[(int)(state[1] >> 48) & 0xff] ^ + C2[(int)(state[0] >> 40) & 0xff] ^ + C3[(int)(state[7] >> 32) & 0xff] ^ + C4[(int)(state[6] >> 24) & 0xff] ^ + C5[(int)(state[5] >> 16) & 0xff] ^ + C6[(int)(state[4] >> 8) & 0xff] ^ + C7[(int)(state[3] ) & 0xff] ^ + K[2]; + + L[3] = C0[(int)(state[3] >> 56) ] ^ + C1[(int)(state[2] >> 48) & 0xff] ^ + C2[(int)(state[1] >> 40) & 0xff] ^ + C3[(int)(state[0] >> 32) & 0xff] ^ + C4[(int)(state[7] >> 24) & 0xff] ^ + C5[(int)(state[6] >> 16) & 0xff] ^ + C6[(int)(state[5] >> 8) & 0xff] ^ + C7[(int)(state[4] ) & 0xff] ^ + K[3]; + + L[4] = C0[(int)(state[4] >> 56) ] ^ + C1[(int)(state[3] >> 48) & 0xff] ^ + C2[(int)(state[2] >> 40) & 0xff] ^ + C3[(int)(state[1] >> 32) & 0xff] ^ + C4[(int)(state[0] >> 24) & 0xff] ^ + C5[(int)(state[7] >> 16) & 0xff] ^ + C6[(int)(state[6] >> 8) & 0xff] ^ + C7[(int)(state[5] ) & 0xff] ^ + K[4]; + + L[5] = C0[(int)(state[5] >> 56) ] ^ + C1[(int)(state[4] >> 48) & 0xff] ^ + C2[(int)(state[3] >> 40) & 0xff] ^ + C3[(int)(state[2] >> 32) & 0xff] ^ + C4[(int)(state[1] >> 24) & 0xff] ^ + C5[(int)(state[0] >> 16) & 0xff] ^ + C6[(int)(state[7] >> 8) & 0xff] ^ + C7[(int)(state[6] ) & 0xff] ^ + K[5]; + + L[6] = C0[(int)(state[6] >> 56) ] ^ + C1[(int)(state[5] >> 48) & 0xff] ^ + C2[(int)(state[4] >> 40) & 0xff] ^ + C3[(int)(state[3] >> 32) & 0xff] ^ + C4[(int)(state[2] >> 24) & 0xff] ^ + C5[(int)(state[1] >> 16) & 0xff] ^ + C6[(int)(state[0] >> 8) & 0xff] ^ + C7[(int)(state[7] ) & 0xff] ^ + K[6]; + + L[7] = C0[(int)(state[7] >> 56) ] ^ + C1[(int)(state[6] >> 48) & 0xff] ^ + C2[(int)(state[5] >> 40) & 0xff] ^ + C3[(int)(state[4] >> 32) & 0xff] ^ + C4[(int)(state[3] >> 24) & 0xff] ^ + C5[(int)(state[2] >> 16) & 0xff] ^ + C6[(int)(state[1] >> 8) & 0xff] ^ + C7[(int)(state[0] ) & 0xff] ^ + K[7]; + + state[0] = L[0]; + state[1] = L[1]; + state[2] = L[2]; + state[3] = L[3]; + state[4] = L[4]; + state[5] = L[5]; + state[6] = L[6]; + state[7] = L[7]; + } + /* + * apply the Miyaguchi-Preneel compression function: + */ + wctx->hash[0] ^= state[0] ^ block[0]; + wctx->hash[1] ^= state[1] ^ block[1]; + wctx->hash[2] ^= state[2] ^ block[2]; + wctx->hash[3] ^= state[3] ^ block[3]; + wctx->hash[4] ^= state[4] ^ block[4]; + wctx->hash[5] ^= state[5] ^ block[5]; + wctx->hash[6] ^= state[6] ^ block[6]; + wctx->hash[7] ^= state[7] ^ block[7]; + +} + +static void wp512_init (void *ctx) { + int i; + struct wp512_ctx *wctx = ctx; + + memset(wctx->bitLength, 0, 32); + wctx->bufferBits = wctx->bufferPos = 0; + wctx->buffer[0] = 0; + for (i = 0; i < 8; i++) { + wctx->hash[i] = 0L; + } +} + +static void wp512_update(void *ctx, const u8 *source, unsigned int len) +{ + + struct wp512_ctx *wctx = ctx; + int sourcePos = 0; + unsigned int bits_len = len * 8; // convert to number of bits + int sourceGap = (8 - ((int)bits_len & 7)) & 7; + int bufferRem = wctx->bufferBits & 7; + int i; + u32 b, carry; + u8 *buffer = wctx->buffer; + u8 *bitLength = wctx->bitLength; + int bufferBits = wctx->bufferBits; + int bufferPos = wctx->bufferPos; + + u64 value = bits_len; + for (i = 31, carry = 0; i >= 0 && (carry != 0 || value != 0ULL); i--) { + carry += bitLength[i] + ((u32)value & 0xff); + bitLength[i] = (u8)carry; + carry >>= 8; + value >>= 8; + } + while (bits_len > 8) { + b = ((source[sourcePos] << sourceGap) & 0xff) | + ((source[sourcePos + 1] & 0xff) >> (8 - sourceGap)); + buffer[bufferPos++] |= (u8)(b >> bufferRem); + bufferBits += 8 - bufferRem; + if (bufferBits == WP512_BLOCK_SIZE * 8) { + wp512_process_buffer(wctx); + bufferBits = bufferPos = 0; + } + buffer[bufferPos] = b << (8 - bufferRem); + bufferBits += bufferRem; + bits_len -= 8; + sourcePos++; + } + if (bits_len > 0) { + b = (source[sourcePos] << sourceGap) & 0xff; + buffer[bufferPos] |= b >> bufferRem; + } else { + b = 0; + } + if (bufferRem + bits_len < 8) { + bufferBits += bits_len; + } else { + bufferPos++; + bufferBits += 8 - bufferRem; + bits_len -= 8 - bufferRem; + if (bufferBits == WP512_BLOCK_SIZE * 8) { + wp512_process_buffer(wctx); + bufferBits = bufferPos = 0; + } + buffer[bufferPos] = b << (8 - bufferRem); + bufferBits += (int)bits_len; + } + + wctx->bufferBits = bufferBits; + wctx->bufferPos = bufferPos; + +} + +static void wp512_final(void *ctx, u8 *out) +{ + struct wp512_ctx *wctx = ctx; + int i; + u8 *buffer = wctx->buffer; + u8 *bitLength = wctx->bitLength; + int bufferBits = wctx->bufferBits; + int bufferPos = wctx->bufferPos; + u8 *digest = out; + + buffer[bufferPos] |= 0x80U >> (bufferBits & 7); + bufferPos++; + if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) { + if (bufferPos < WP512_BLOCK_SIZE) { + memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos); + } + wp512_process_buffer(wctx); + bufferPos = 0; + } + if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES) { + memset(&buffer[bufferPos], 0, + (WP512_BLOCK_SIZE - WP512_LENGTHBYTES) - bufferPos); + } + bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES; + memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES], + bitLength, WP512_LENGTHBYTES); + wp512_process_buffer(wctx); + for (i = 0; i < WP512_DIGEST_SIZE/8; i++) { + digest[0] = (u8)(wctx->hash[i] >> 56); + digest[1] = (u8)(wctx->hash[i] >> 48); + digest[2] = (u8)(wctx->hash[i] >> 40); + digest[3] = (u8)(wctx->hash[i] >> 32); + digest[4] = (u8)(wctx->hash[i] >> 24); + digest[5] = (u8)(wctx->hash[i] >> 16); + digest[6] = (u8)(wctx->hash[i] >> 8); + digest[7] = (u8)(wctx->hash[i] ); + digest += 8; + } + wctx->bufferBits = bufferBits; + wctx->bufferPos = bufferPos; +} + +static void wp384_final(void *ctx, u8 *out) +{ + struct wp512_ctx *wctx = ctx; + u8 D[64]; + + wp512_final (wctx, D); + memcpy (out, D, WP384_DIGEST_SIZE); + memset (D, 0, WP512_DIGEST_SIZE); +} + +static void wp256_final(void *ctx, u8 *out) +{ + struct wp512_ctx *wctx = ctx; + u8 D[64]; + + wp512_final (wctx, D); + memcpy (out, D, WP256_DIGEST_SIZE); + memset (D, 0, WP512_DIGEST_SIZE); +} + +static struct crypto_alg wp512 = { + .cra_name = "wp512", + .cra_flags = CRYPTO_ALG_TYPE_DIGEST, + .cra_blocksize = WP512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct wp512_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(wp512.cra_list), + .cra_u = { .digest = { + .dia_digestsize = WP512_DIGEST_SIZE, + .dia_init = wp512_init, + .dia_update = wp512_update, + .dia_final = wp512_final } } +}; + +static struct crypto_alg wp384 = { + .cra_name = "wp384", + .cra_flags = CRYPTO_ALG_TYPE_DIGEST, + .cra_blocksize = WP512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct wp512_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(wp384.cra_list), + .cra_u = { .digest = { + .dia_digestsize = WP384_DIGEST_SIZE, + .dia_init = wp512_init, + .dia_update = wp512_update, + .dia_final = wp384_final } } +}; + +static struct crypto_alg wp256 = { + .cra_name = "wp256", + .cra_flags = CRYPTO_ALG_TYPE_DIGEST, + .cra_blocksize = WP512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct wp512_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(wp256.cra_list), + .cra_u = { .digest = { + .dia_digestsize = WP256_DIGEST_SIZE, + .dia_init = wp512_init, + .dia_update = wp512_update, + .dia_final = wp256_final } } +}; + +static int __init init(void) +{ + int ret = 0; + + ret = crypto_register_alg(&wp512); + + if (ret < 0) + goto out; + + ret = crypto_register_alg(&wp384); + if (ret < 0) + { + crypto_unregister_alg(&wp512); + goto out; + } + + ret = crypto_register_alg(&wp256); + if (ret < 0) + { + crypto_unregister_alg(&wp512); + crypto_unregister_alg(&wp384); + } +out: + return ret; +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&wp512); + crypto_unregister_alg(&wp384); + crypto_unregister_alg(&wp256); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Whirlpool Message Digest Algorithm"); diff -urN linux-2.4.27/drivers/acpi/Makefile linux-2.4.28/drivers/acpi/Makefile --- linux-2.4.27/drivers/acpi/Makefile 2003-08-25 04:44:40.000000000 -0700 +++ linux-2.4.28/drivers/acpi/Makefile 2004-11-17 03:54:21.185380913 -0800 @@ -38,7 +38,7 @@ # ACPI Bus and Device Drivers # ifeq ($(CONFIG_ACPI_BUS),y) - obj-y += bus.o + obj-y += bus.o motherboard.o obj-$(CONFIG_ACPI_AC) += ac.o obj-$(CONFIG_ACPI_BATTERY) += battery.o obj-$(CONFIG_ACPI_BUTTON) += button.o diff -urN linux-2.4.27/drivers/acpi/bus.c linux-2.4.28/drivers/acpi/bus.c --- linux-2.4.27/drivers/acpi/bus.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/drivers/acpi/bus.c 2004-11-17 03:54:21.186380954 -0800 @@ -1844,10 +1844,9 @@ } -static int __init -acpi_bus_init (void) +void __init +acpi_early_init (void) { - int result = 0; acpi_status status = AE_OK; struct acpi_buffer buffer = {sizeof(acpi_fadt), &acpi_fadt}; @@ -1871,7 +1870,7 @@ status = acpi_get_table(ACPI_TABLE_FADT, 1, &buffer); if (ACPI_FAILURE(status)) { printk(KERN_ERR PREFIX "Unable to get the FADT\n"); - goto error1; + goto error0; } #ifdef CONFIG_X86 @@ -1894,12 +1893,40 @@ } #endif - status = acpi_enable_subsystem(ACPI_FULL_INITIALIZATION); + status = acpi_enable_subsystem(~(ACPI_NO_HARDWARE_INIT | ACPI_NO_ACPI_ENABLE)); + if (ACPI_FAILURE(status)) { + printk(KERN_ERR PREFIX "Unable to enable ACPI\n"); + goto error0; + } + + return; + +error0: + disable_acpi(); + return; +} + +static int __init +acpi_bus_init (void) +{ + int result = 0; + acpi_status status = AE_OK; + extern acpi_status acpi_os_initialize1(void); + + ACPI_FUNCTION_TRACE("acpi_bus_init"); + + status = acpi_os_initialize1(); + + status = acpi_enable_subsystem(ACPI_NO_HARDWARE_INIT | ACPI_NO_ACPI_ENABLE); if (ACPI_FAILURE(status)) { printk(KERN_ERR PREFIX "Unable to start the ACPI Interpreter\n"); goto error1; } + if (ACPI_FAILURE(status)) { + printk(KERN_ERR PREFIX "Unable to initialize ACPI OS objects\n"); + goto error1; + } #ifdef CONFIG_ACPI_EC /* * ACPI 2.0 requires the EC driver to be loaded and work before @@ -1979,6 +2006,7 @@ if (result) goto error4; + acpi_motherboard_init(); return_VALUE(0); /* Mimic structured exception handling */ @@ -1991,7 +2019,6 @@ ACPI_SYSTEM_NOTIFY, &acpi_bus_notify); error1: acpi_terminate(); -error0: return_VALUE(-ENODEV); } diff -urN linux-2.4.27/drivers/acpi/dispatcher/dsopcode.c linux-2.4.28/drivers/acpi/dispatcher/dsopcode.c --- linux-2.4.27/drivers/acpi/dispatcher/dsopcode.c 2004-04-14 06:05:28.000000000 -0700 +++ linux-2.4.28/drivers/acpi/dispatcher/dsopcode.c 2004-11-17 03:54:21.187380995 -0800 @@ -79,7 +79,6 @@ acpi_status status; union acpi_parse_object *op; struct acpi_walk_state *walk_state; - union acpi_parse_object *arg; ACPI_FUNCTION_TRACE ("ds_execute_arguments"); @@ -126,9 +125,7 @@ /* Get and init the Op created above */ - arg = op->common.value.arg; op->common.node = node; - arg->common.node = node; acpi_ps_delete_parse_tree (op); /* Evaluate the deferred arguments */ diff -urN linux-2.4.27/drivers/acpi/events/evmisc.c linux-2.4.28/drivers/acpi/events/evmisc.c --- linux-2.4.27/drivers/acpi/events/evmisc.c 2004-04-14 06:05:28.000000000 -0700 +++ linux-2.4.28/drivers/acpi/events/evmisc.c 2004-11-17 03:54:21.188381036 -0800 @@ -139,7 +139,7 @@ acpi_notify_value_names[notify_value])); } else { - ACPI_DEBUG_PRINT ((ACPI_DB_INFO, "notify value: 0x2.2_x **Device Specific**\n", + ACPI_DEBUG_PRINT ((ACPI_DB_INFO, "Notify value: 0x%2.2X **Device Specific**\n", notify_value)); } diff -urN linux-2.4.27/drivers/acpi/hardware/hwsleep.c linux-2.4.28/drivers/acpi/hardware/hwsleep.c --- linux-2.4.27/drivers/acpi/hardware/hwsleep.c 2004-04-14 06:05:28.000000000 -0700 +++ linux-2.4.28/drivers/acpi/hardware/hwsleep.c 2004-11-17 03:54:21.188381036 -0800 @@ -265,19 +265,19 @@ sleep_type_reg_info = acpi_hw_get_bit_register_info (ACPI_BITREG_SLEEP_TYPE_A); sleep_enable_reg_info = acpi_hw_get_bit_register_info (ACPI_BITREG_SLEEP_ENABLE); - if (sleep_state != ACPI_STATE_S5) { - /* Clear wake status */ + /* Clear wake status */ - status = acpi_set_register (ACPI_BITREG_WAKE_STATUS, 1, ACPI_MTX_DO_NOT_LOCK); - if (ACPI_FAILURE (status)) { - return_ACPI_STATUS (status); - } + status = acpi_set_register (ACPI_BITREG_WAKE_STATUS, 1, ACPI_MTX_DO_NOT_LOCK); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } - status = acpi_hw_clear_acpi_status (ACPI_MTX_DO_NOT_LOCK); - if (ACPI_FAILURE (status)) { - return_ACPI_STATUS (status); - } + status = acpi_hw_clear_acpi_status (ACPI_MTX_DO_NOT_LOCK); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } + if (sleep_state != ACPI_STATE_S5) { /* Disable BM arbitration */ status = acpi_set_register (ACPI_BITREG_ARB_DISABLE, 1, ACPI_MTX_DO_NOT_LOCK); diff -urN linux-2.4.27/drivers/acpi/motherboard.c linux-2.4.28/drivers/acpi/motherboard.c --- linux-2.4.27/drivers/acpi/motherboard.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.4.28/drivers/acpi/motherboard.c 2004-11-17 03:54:21.189381078 -0800 @@ -0,0 +1,159 @@ +/* + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define _COMPONENT ACPI_SYSTEM_COMPONENT +ACPI_MODULE_NAME ("acpi_motherboard") + +/* Dell use PNP0C01 instead of PNP0C02 */ +#define ACPI_MB_HID1 "PNP0C01" +#define ACPI_MB_HID2 "PNP0C02" + +/** + * Doesn't care about legacy IO ports, only IO ports beyond 0x1000 are reserved + * Doesn't care about the failure of 'request_region', since other may reserve + * the io ports as well + */ +#define IS_RESERVED_ADDR(base, len) \ + (((len) > 0) && ((base) > 0) && ((base) + (len) < IO_SPACE_LIMIT) \ + && ((base) + (len) > PCIBIOS_MIN_IO)) + +static acpi_status +acpi_reserve_io_ranges (struct acpi_resource *res, void *data) +{ + ACPI_FUNCTION_TRACE("acpi_reserve_io_ranges"); + + if (res->id == ACPI_RSTYPE_IO) { + struct acpi_resource_io *io_res = &res->data.io; + + if (io_res->min_base_address != io_res->max_base_address) + return AE_OK; + if (IS_RESERVED_ADDR(io_res->min_base_address, io_res->range_length)) { + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Motherboard resources 0x%08x - 0x%08x\n", + io_res->min_base_address, + io_res->min_base_address + io_res->range_length)); + request_region(io_res->min_base_address, + io_res->range_length, "motherboard"); + } + }else if (res->id == ACPI_RSTYPE_FIXED_IO) { + struct acpi_resource_fixed_io *fixed_io_res = &res->data.fixed_io; + + if (IS_RESERVED_ADDR(fixed_io_res->base_address, fixed_io_res->range_length)) { + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Motherboard resources 0x%08x - 0x%08x\n", + fixed_io_res->base_address, + fixed_io_res->base_address + fixed_io_res->range_length)); + request_region(fixed_io_res->base_address, + fixed_io_res->range_length, "motherboard"); + } + }else { + /* Memory mapped IO? */ + } + + return AE_OK; +} + +static int acpi_motherboard_add (struct acpi_device *device) +{ + if (!device) + return -EINVAL; + acpi_walk_resources(device->handle, METHOD_NAME__CRS, + acpi_reserve_io_ranges, NULL); + + return 0; +} + +static struct acpi_driver acpi_motherboard_driver1 = { + .name = "motherboard", + .class = "", + .ids = ACPI_MB_HID1, + .ops = { + .add = acpi_motherboard_add, + }, +}; + +static struct acpi_driver acpi_motherboard_driver2 = { + .name = "motherboard", + .class = "", + .ids = ACPI_MB_HID2, + .ops = { + .add = acpi_motherboard_add, + }, +}; + +static void __init +acpi_reserve_resources (void) +{ + if (acpi_gbl_FADT->xpm1a_evt_blk.address && acpi_gbl_FADT->pm1_evt_len) + request_region(acpi_gbl_FADT->xpm1a_evt_blk.address, + acpi_gbl_FADT->pm1_evt_len, "PM1a_EVT_BLK"); + + if (acpi_gbl_FADT->xpm1b_evt_blk.address && acpi_gbl_FADT->pm1_evt_len) + request_region(acpi_gbl_FADT->xpm1b_evt_blk.address, + acpi_gbl_FADT->pm1_evt_len, "PM1b_EVT_BLK"); + + if (acpi_gbl_FADT->xpm1a_cnt_blk.address && acpi_gbl_FADT->pm1_cnt_len) + request_region(acpi_gbl_FADT->xpm1a_cnt_blk.address, + acpi_gbl_FADT->pm1_cnt_len, "PM1a_CNT_BLK"); + + if (acpi_gbl_FADT->xpm1b_cnt_blk.address && acpi_gbl_FADT->pm1_cnt_len) + request_region(acpi_gbl_FADT->xpm1b_cnt_blk.address, + acpi_gbl_FADT->pm1_cnt_len, "PM1b_CNT_BLK"); + + if (acpi_gbl_FADT->xpm_tmr_blk.address && acpi_gbl_FADT->pm_tm_len == 4) + request_region(acpi_gbl_FADT->xpm_tmr_blk.address, + 4, "PM_TMR"); + + if (acpi_gbl_FADT->xpm2_cnt_blk.address && acpi_gbl_FADT->pm2_cnt_len) + request_region(acpi_gbl_FADT->xpm2_cnt_blk.address, + acpi_gbl_FADT->pm2_cnt_len, "PM2_CNT_BLK"); + + /* Length of GPE blocks must be a non-negative multiple of 2 */ + + if (acpi_gbl_FADT->xgpe0_blk.address && acpi_gbl_FADT->gpe0_blk_len && + !(acpi_gbl_FADT->gpe0_blk_len & 0x1)) + request_region(acpi_gbl_FADT->xgpe0_blk.address, + acpi_gbl_FADT->gpe0_blk_len, "GPE0_BLK"); + + if (acpi_gbl_FADT->xgpe1_blk.address && acpi_gbl_FADT->gpe1_blk_len && + !(acpi_gbl_FADT->gpe1_blk_len & 0x1)) + request_region(acpi_gbl_FADT->xgpe1_blk.address, + acpi_gbl_FADT->gpe1_blk_len, "GPE1_BLK"); +} + +int __init acpi_motherboard_init(void) +{ + acpi_bus_register_driver(&acpi_motherboard_driver1); + acpi_bus_register_driver(&acpi_motherboard_driver2); + /* + * Guarantee motherboard IO reservation first + * This module must run after scan.c + */ + if (!acpi_disabled) + acpi_reserve_resources (); + return 0; +} diff -urN linux-2.4.27/drivers/acpi/osl.c linux-2.4.28/drivers/acpi/osl.c --- linux-2.4.27/drivers/acpi/osl.c 2004-04-14 06:05:29.000000000 -0700 +++ linux-2.4.28/drivers/acpi/osl.c 2004-11-17 03:54:21.190381119 -0800 @@ -72,6 +72,12 @@ acpi_status acpi_os_initialize(void) { + return AE_OK; +} + +acpi_status +acpi_os_initialize1(void) +{ /* * Initialize PCI configuration space access, as we'll need to access * it while walking the namespace (bus 0 and root bridges w/ _BBNs). diff -urN linux-2.4.27/drivers/acpi/system.c linux-2.4.28/drivers/acpi/system.c --- linux-2.4.27/drivers/acpi/system.c 2004-04-14 06:05:29.000000000 -0700 +++ linux-2.4.28/drivers/acpi/system.c 2004-11-17 03:54:21.191381160 -0800 @@ -102,7 +102,6 @@ BUG(); /* Some SMP machines only can poweroff in boot CPU */ set_cpus_allowed(current, 1UL << cpu_logical_map(0)); - acpi_system_save_state(ACPI_STATE_S5); acpi_enter_sleep_state_prep(ACPI_STATE_S5); ACPI_DISABLE_IRQS(); acpi_enter_sleep_state(ACPI_STATE_S5); diff -urN linux-2.4.27/drivers/acpi/utilities/utglobal.c linux-2.4.28/drivers/acpi/utilities/utglobal.c --- linux-2.4.27/drivers/acpi/utilities/utglobal.c 2004-04-14 06:05:29.000000000 -0700 +++ linux-2.4.28/drivers/acpi/utilities/utglobal.c 2004-11-17 03:54:21.192381201 -0800 @@ -783,12 +783,6 @@ ACPI_FUNCTION_TRACE ("ut_init_globals"); - /* Runtime configuration */ - - acpi_gbl_create_osi_method = TRUE; - acpi_gbl_all_methods_serialized = FALSE; - acpi_gbl_leave_wake_gpes_disabled = TRUE; - /* Memory allocation and cache lists */ ACPI_MEMSET (acpi_gbl_memory_lists, 0, sizeof (struct acpi_memory_list) * ACPI_NUM_MEM_LISTS); diff -urN linux-2.4.27/drivers/atm/atmtcp.c linux-2.4.28/drivers/atm/atmtcp.c --- linux-2.4.27/drivers/atm/atmtcp.c 2003-11-28 10:26:19.000000000 -0800 +++ linux-2.4.28/drivers/atm/atmtcp.c 2004-11-17 03:54:21.193381242 -0800 @@ -246,7 +246,7 @@ dev_data = PRIV(atmtcp_dev); dev_data->vcc = NULL; if (dev_data->persist) return; - PRIV(atmtcp_dev) = NULL; + atmtcp_dev->dev_data = NULL; kfree(dev_data); shutdown_atm_dev(atmtcp_dev); vcc->dev_data = NULL; @@ -362,7 +362,7 @@ } dev->ci_range.vpi_bits = MAX_VPI_BITS; dev->ci_range.vci_bits = MAX_VCI_BITS; - PRIV(dev) = dev_data; + dev->dev_data = dev_data; PRIV(dev)->vcc = NULL; PRIV(dev)->persist = persist; if (result) *result = dev; diff -urN linux-2.4.27/drivers/atm/eni.c linux-2.4.28/drivers/atm/eni.c --- linux-2.4.27/drivers/atm/eni.c 2003-11-28 10:26:19.000000000 -0800 +++ linux-2.4.28/drivers/atm/eni.c 2004-11-17 03:54:21.194381283 -0800 @@ -1879,7 +1879,7 @@ DPRINTK("eni_close: done waiting\n"); /* deallocate memory */ kfree(ENI_VCC(vcc)); - ENI_VCC(vcc) = NULL; + vcc->dev_data = NULL; clear_bit(ATM_VF_ADDR,&vcc->flags); /*foo();*/ } @@ -1951,7 +1951,7 @@ DPRINTK(">eni_open\n"); EVENT("eni_open\n",0,0); - if (!test_bit(ATM_VF_PARTIAL,&vcc->flags)) ENI_VCC(vcc) = NULL; + if (!test_bit(ATM_VF_PARTIAL,&vcc->flags)) vcc->dev_data = NULL; eni_dev = ENI_DEV(vcc->dev); error = get_ci(vcc,&vpi,&vci); if (error) return error; @@ -1966,7 +1966,7 @@ if (!test_bit(ATM_VF_PARTIAL,&vcc->flags)) { eni_vcc = kmalloc(sizeof(struct eni_vcc),GFP_KERNEL); if (!eni_vcc) return -ENOMEM; - ENI_VCC(vcc) = eni_vcc; + vcc->dev_data = eni_vcc; eni_vcc->tx = NULL; /* for eni_close after open_rx */ if ((error = open_rx_first(vcc))) { eni_close(vcc); @@ -2302,7 +2302,7 @@ if (!dev) goto out2; pci_set_drvdata(pci_dev, dev); eni_dev->pci_dev = pci_dev; - ENI_DEV(dev) = eni_dev; + dev->dev_data = eni_dev; eni_dev->asic = ent->driver_data; error = eni_do_init(dev); if (error) goto out3; diff -urN linux-2.4.27/drivers/atm/fore200e.c linux-2.4.28/drivers/atm/fore200e.c --- linux-2.4.27/drivers/atm/fore200e.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/drivers/atm/fore200e.c 2004-11-17 03:54:21.196381365 -0800 @@ -101,6 +101,7 @@ #if 1 +#undef ASSERT #define ASSERT(expr) if (!(expr)) { \ printk(FORE200E "assertion failed! %s[%d]: %s\n", \ __FUNCTION__, __LINE__, #expr); \ @@ -1620,7 +1621,7 @@ set_bit(ATM_VF_PARTIAL,&vcc->flags); set_bit(ATM_VF_ADDR, &vcc->flags); - FORE200E_VCC(vcc) = fore200e_vcc; + vcc->dev_data = fore200e_vcc; if (fore200e_activate_vcin(fore200e, 1, vcc, vcc->qos.rxtp.max_sdu) < 0) { @@ -1629,7 +1630,7 @@ clear_bit(ATM_VF_ADDR, &vcc->flags); clear_bit(ATM_VF_PARTIAL,&vcc->flags); - FORE200E_VCC(vcc) = NULL; + vcc->dev_data = NULL; fore200e->available_cell_rate += vcc->qos.txtp.max_pcr; @@ -1691,7 +1692,7 @@ vcc->itf = vcc->vci = vcc->vpi = 0; fore200e_vcc = FORE200E_VCC(vcc); - FORE200E_VCC(vcc) = NULL; + vcc->dev_data = NULL; spin_unlock_irqrestore(&fore200e->q_lock, flags); @@ -2738,7 +2739,7 @@ return -ENODEV; } - FORE200E_DEV(atm_dev) = fore200e; + atm_dev->dev_data = fore200e; fore200e->atm_dev = atm_dev; atm_dev->ci_range.vpi_bits = FORE200E_VPI_BITS; diff -urN linux-2.4.27/drivers/atm/he.c linux-2.4.28/drivers/atm/he.c --- linux-2.4.27/drivers/atm/he.c 2004-02-18 05:36:31.000000000 -0800 +++ linux-2.4.28/drivers/atm/he.c 2004-11-17 03:54:21.198381448 -0800 @@ -378,7 +378,7 @@ he_dev->pci_dev = pci_dev; he_dev->atm_dev = atm_dev; he_dev->atm_dev->dev_data = he_dev; - HE_DEV(atm_dev) = he_dev; + atm_dev->dev_data = he_dev; he_dev->number = atm_dev->number; if (he_start(atm_dev)) { he_stop(he_dev); @@ -2347,7 +2347,7 @@ init_waitqueue_head(&he_vcc->rx_waitq); init_waitqueue_head(&he_vcc->tx_waitq); - HE_VCC(vcc) = he_vcc; + vcc->dev_data = he_vcc; if (vcc->qos.txtp.traffic_class != ATM_NONE) { int pcr_goal; diff -urN linux-2.4.27/drivers/atm/idt77105.c linux-2.4.28/drivers/atm/idt77105.c --- linux-2.4.27/drivers/atm/idt77105.c 2003-06-13 07:51:32.000000000 -0700 +++ linux-2.4.28/drivers/atm/idt77105.c 2004-11-17 03:54:21.199381489 -0800 @@ -267,7 +267,7 @@ { unsigned long flags; - if (!(PRIV(dev) = kmalloc(sizeof(struct idt77105_priv),GFP_KERNEL))) + if (!(dev->phy_data = kmalloc(sizeof(struct idt77105_priv),GFP_KERNEL))) return -ENOMEM; PRIV(dev)->dev = dev; spin_lock_irqsave(&idt77105_priv_lock, flags); @@ -345,7 +345,7 @@ else idt77105_all = walk->next; dev->phy = NULL; - PRIV(dev) = NULL; + dev->phy_data = NULL; kfree(walk); break; } diff -urN linux-2.4.27/drivers/atm/iphase.c linux-2.4.28/drivers/atm/iphase.c --- linux-2.4.27/drivers/atm/iphase.c 2003-08-25 04:44:41.000000000 -0700 +++ linux-2.4.28/drivers/atm/iphase.c 2004-11-17 03:54:21.202381612 -0800 @@ -1782,7 +1782,7 @@ (iadev->tx_buf_sz - sizeof(struct cpcs_trailer))){ printk("IA: SDU size over (%d) the configured SDU size %d\n", vcc->qos.txtp.max_sdu,iadev->tx_buf_sz); - INPH_IA_VCC(vcc) = NULL; + vcc->dev_data = NULL; kfree(ia_vcc); return -EINVAL; } @@ -2707,7 +2707,7 @@ } kfree(INPH_IA_VCC(vcc)); ia_vcc = NULL; - INPH_IA_VCC(vcc) = NULL; + vcc->dev_data = NULL; clear_bit(ATM_VF_ADDR,&vcc->flags); return; } @@ -2720,7 +2720,7 @@ if (!test_bit(ATM_VF_PARTIAL,&vcc->flags)) { IF_EVENT(printk("ia: not partially allocated resources\n");) - INPH_IA_VCC(vcc) = NULL; + vcc->dev_data = NULL; } iadev = INPH_IA_DEV(vcc->dev); error = atm_find_ci(vcc, &vpi, &vci); @@ -2744,7 +2744,7 @@ /* Device dependent initialization */ ia_vcc = kmalloc(sizeof(*ia_vcc), GFP_KERNEL); if (!ia_vcc) return -ENOMEM; - INPH_IA_VCC(vcc) = ia_vcc; + vcc->dev_data = ia_vcc; if ((error = open_rx(vcc))) { @@ -3256,7 +3256,7 @@ ret = -ENOMEM; goto err_out_disable_dev; } - INPH_IA_DEV(dev) = iadev; + dev->dev_data = iadev; IF_INIT(printk(DEV_LABEL "registered at (itf :%d)\n", dev->number);) IF_INIT(printk("dev_id = 0x%x iadev->LineRate = %d \n", (u32)dev, iadev->LineRate);) diff -urN linux-2.4.27/drivers/atm/suni.c linux-2.4.28/drivers/atm/suni.c --- linux-2.4.27/drivers/atm/suni.c 2003-06-13 07:51:32.000000000 -0700 +++ linux-2.4.28/drivers/atm/suni.c 2004-11-17 03:54:21.202381612 -0800 @@ -230,7 +230,7 @@ unsigned long flags; int first; - if (!(PRIV(dev) = kmalloc(sizeof(struct suni_priv),GFP_KERNEL))) + if (!(dev->phy_data = kmalloc(sizeof(struct suni_priv),GFP_KERNEL))) return -ENOMEM; PRIV(dev)->dev = dev; diff -urN linux-2.4.27/drivers/atm/uPD98402.c linux-2.4.28/drivers/atm/uPD98402.c --- linux-2.4.27/drivers/atm/uPD98402.c 2001-09-13 15:21:32.000000000 -0700 +++ linux-2.4.28/drivers/atm/uPD98402.c 2004-11-17 03:54:21.203381653 -0800 @@ -212,7 +212,7 @@ static int uPD98402_start(struct atm_dev *dev) { DPRINTK("phy_start\n"); - if (!(PRIV(dev) = kmalloc(sizeof(struct uPD98402_priv),GFP_KERNEL))) + if (!(dev->phy_data = kmalloc(sizeof(struct uPD98402_priv),GFP_KERNEL))) return -ENOMEM; memset(&PRIV(dev)->sonet_stats,0,sizeof(struct k_sonet_stats)); (void) GET(PCR); /* clear performance events */ diff -urN linux-2.4.27/drivers/atm/zatm.c linux-2.4.28/drivers/atm/zatm.c --- linux-2.4.27/drivers/atm/zatm.c 2003-08-25 04:44:41.000000000 -0700 +++ linux-2.4.28/drivers/atm/zatm.c 2004-11-17 03:54:21.204381694 -0800 @@ -1565,7 +1565,7 @@ DPRINTK("zatm_close: done waiting\n"); /* deallocate memory */ kfree(ZATM_VCC(vcc)); - ZATM_VCC(vcc) = NULL; + vcc->dev_data = NULL; clear_bit(ATM_VF_ADDR,&vcc->flags); } @@ -1578,7 +1578,7 @@ DPRINTK(">zatm_open\n"); zatm_dev = ZATM_DEV(vcc->dev); - if (!test_bit(ATM_VF_PARTIAL,&vcc->flags)) ZATM_VCC(vcc) = NULL; + if (!test_bit(ATM_VF_PARTIAL,&vcc->flags)) vcc->dev_data = NULL; error = atm_find_ci(vcc,&vpi,&vci); if (error) return error; vcc->vpi = vpi; @@ -1594,7 +1594,7 @@ clear_bit(ATM_VF_ADDR,&vcc->flags); return -ENOMEM; } - ZATM_VCC(vcc) = zatm_vcc; + vcc->dev_data = zatm_vcc; ZATM_VCC(vcc)->tx_chan = 0; /* for zatm_close after open_rx */ if ((error = open_rx_first(vcc))) { zatm_close(vcc); @@ -1828,7 +1828,7 @@ dev = atm_dev_register(DEV_LABEL,&ops,-1,NULL); if (!dev) break; zatm_dev->pci_dev = pci_dev; - ZATM_DEV(dev) = zatm_dev; + dev->dev_data = zatm_dev; zatm_dev->copper = type; if (zatm_init(dev) || zatm_start(dev)) { atm_dev_deregister(dev); diff -urN linux-2.4.27/drivers/block/cciss.c linux-2.4.28/drivers/block/cciss.c --- linux-2.4.27/drivers/block/cciss.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/drivers/block/cciss.c 2004-11-17 03:54:21.207381818 -0800 @@ -80,7 +80,7 @@ 0x0E11, 0x4091, 0, 0, 0}, { PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409E, 0, 0, 0}, - { PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, + { PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISS, 0x103C, 0x3211, 0, 0, 0}, {0,} }; @@ -125,7 +125,7 @@ /* Originally cciss driver only supports 8 major number */ #define MAX_CTLR_ORIG COMPAQ_CISS_MAJOR7 - COMPAQ_CISS_MAJOR + 1 -#define CCISS_DMA_MASK 0xFFFFFFFFFFFFFFFF /* 64 bit DMA */ +#define CCISS_DMA_MASK 0xFFFFFFFFFFFFFFFFULL /* 64 bit DMA */ #ifdef CONFIG_CISS_MONITOR_THREAD static int cciss_monitor(void *ctlr); @@ -168,6 +168,35 @@ static void cciss_procinit(int i) {} #endif /* CONFIG_PROC_FS */ +/* + * Enqueuing and dequeuing functions for cmdlists. + */ +static inline void addQ(CommandList_struct **Qptr, CommandList_struct *c) +{ + if (*Qptr == NULL) { + *Qptr = c; + c->next = c->prev = c; + } else { + c->prev = (*Qptr)->prev; + c->next = (*Qptr); + (*Qptr)->prev->next = c; + (*Qptr)->prev = c; + } +} + +static inline CommandList_struct *removeQ(CommandList_struct **Qptr, + CommandList_struct *c) +{ + if (c && c->next != c) { + if (*Qptr == c) *Qptr = c->next; + c->prev->next = c->next; + c->next->prev = c->prev; + } else { + *Qptr = NULL; + } + return c; +} + static struct block_device_operations cciss_fops = { owner: THIS_MODULE, open: cciss_open, @@ -503,10 +532,9 @@ extern int unregister_ioctl32_conversion(unsigned int cmd); static int cciss_ioctl32_passthru(unsigned int fd, unsigned cmd, unsigned long arg, struct file *file); -static int cciss_ioctl32_big_passthru(unsigned int fd, unsigned cmd, unsigned long arg, - struct file *file); +static int cciss_ioctl32_big_passthru(unsigned int fd, unsigned cmd, unsigned long arg, struct file *file); -typedef long (*handler type) (unsigned int, unsigned int, unsigned long, +typedef int (*handler_type) (unsigned int, unsigned int, unsigned long, struct file *); static struct ioctl32_map { @@ -576,13 +604,15 @@ IOCTL_Command_struct arg64; mm_segment_t old_fs; int err; + __u64 tmp_ptr; err = 0; err |= copy_from_user(&arg64.LUN_info, &arg32->LUN_info, sizeof(arg64.LUN_info)); err |= copy_from_user(&arg64.Request, &arg32->Request, sizeof(arg64.Request)); err |= copy_from_user(&arg64.error_info, &arg32->error_info, sizeof(arg64.error_info)); err |= get_user(arg64.buf_size, &arg32->buf_size); - err |= get_user(arg64.buf, &arg32->buf); + err |= get_user(tmp_ptr, &arg32->buf); + arg64.buf = (BYTE *) tmp_ptr; if (err) return -EFAULT; @@ -592,7 +622,7 @@ set_fs(old_fs); if (err) return err; - err |= copy_to_user(&arg32->error_info, &arg64.error_info, sizeof(&arg32->error_info)); + err |= copy_to_user(&arg32->error_info, &arg64.error_info, sizeof(arg32->error_info)); if (err) return -EFAULT; return err; @@ -605,6 +635,7 @@ BIG_IOCTL_Command_struct arg64; mm_segment_t old_fs; int err; + __u64 tmp_ptr; err = 0; err |= copy_from_user(&arg64.LUN_info, &arg32->LUN_info, sizeof(arg64.LUN_info)); @@ -612,7 +643,8 @@ err |= copy_from_user(&arg64.error_info, &arg32->error_info, sizeof(arg64.error_info)); err |= get_user(arg64.buf_size, &arg32->buf_size); err |= get_user(arg64.malloc_size, &arg32->malloc_size); - err |= get_user(arg64.buf, &arg32->buf); + err |= get_user(tmp_ptr, &arg32->buf); + arg64.buf = (BYTE *) tmp_ptr; if (err) return -EFAULT; old_fs = get_fs(); set_fs(KERNEL_DS); @@ -620,7 +652,7 @@ set_fs(old_fs); if (err) return err; - err |= copy_to_user(&arg32->error_info, &arg64.error_info, sizeof(&arg32->error_info)); + err |= copy_to_user(&arg32->error_info, &arg64.error_info, sizeof(arg32->error_info)); if (err) return -EFAULT; return err; @@ -629,6 +661,7 @@ static inline void register_cciss_ioctl32(void) {} static inline void unregister_cciss_ioctl32(void) {} #endif + /* * ioctl */ @@ -942,6 +975,8 @@ { kfree(buff); return -EFAULT; + } else { + memset(buff, 0, iocommand.buf_size); } } if ((c = cmd_alloc(h , 0)) == NULL) { @@ -1060,12 +1095,15 @@ goto cleanup1; } if (iocommand.Request.Type.Direction == - XFER_WRITE) + XFER_WRITE) { /* Copy the data into the buffer created */ if (copy_from_user(buff[sg_used], data_ptr, buff_size[sg_used])) { status = -ENOMEM; goto cleanup1; + } else { + memset(buff[sg_used], 0, buff_size[sg_used]); + } } size_left_alloc -= buff_size[sg_used]; data_ptr += buff_size[sg_used]; @@ -2129,35 +2167,6 @@ return (ulong) (page_remapped ? (page_remapped + page_offs) : 0UL); } -/* - * Enqueuing and dequeuing functions for cmdlists. - */ -static inline void addQ(CommandList_struct **Qptr, CommandList_struct *c) -{ - if (*Qptr == NULL) { - *Qptr = c; - c->next = c->prev = c; - } else { - c->prev = (*Qptr)->prev; - c->next = (*Qptr); - (*Qptr)->prev->next = c; - (*Qptr)->prev = c; - } -} - -static inline CommandList_struct *removeQ(CommandList_struct **Qptr, - CommandList_struct *c) -{ - if (c && c->next != c) { - if (*Qptr == c) *Qptr = c->next; - c->prev->next = c->next; - c->next->prev = c->prev; - } else { - *Qptr = NULL; - } - return c; -} - /* * Takes jobs of the Q and sends them to the hardware, then puts it on * the Q to wait for completion. @@ -2671,7 +2680,7 @@ static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) { ushort subsystem_vendor_id, subsystem_device_id, command; - unchar irq = pdev->irq, ready = 0; + int ready = 0; __u32 board_id, scratchpad; __u64 cfg_offset; __u32 cfg_base_addr; @@ -2727,11 +2736,11 @@ #ifdef CCISS_DEBUG printk("command = %x\n", command); - printk("irq = %x\n", irq); + printk("irq = %x\n", pdev->irq); printk("board_id = %x\n", board_id); #endif /* CCISS_DEBUG */ - c->intr = irq; + c->intr = pdev->irq; /* * Memory base addr is first addr , the second points to the config diff -urN linux-2.4.27/drivers/block/cciss.h linux-2.4.28/drivers/block/cciss.h --- linux-2.4.27/drivers/block/cciss.h 2004-02-18 05:36:31.000000000 -0800 +++ linux-2.4.28/drivers/block/cciss.h 2004-11-17 03:54:21.208381859 -0800 @@ -51,7 +51,7 @@ unsigned long io_mem_addr; unsigned long io_mem_length; CfgTable_struct *cfgtable; - int intr; + unsigned int intr; int interrupts_enabled; int max_commands; int commands_outstanding; diff -urN linux-2.4.27/drivers/block/cpqarray.c linux-2.4.28/drivers/block/cpqarray.c --- linux-2.4.27/drivers/block/cpqarray.c 2004-02-18 05:36:31.000000000 -0800 +++ linux-2.4.28/drivers/block/cpqarray.c 2004-11-17 03:54:21.209381900 -0800 @@ -960,6 +960,20 @@ return c; } +static inline void complete_buffers(struct buffer_head *bh, int ok) +{ + struct buffer_head *xbh; + while(bh) { + xbh = bh->b_reqnext; + bh->b_reqnext = NULL; + + blk_finished_io(bh->b_size >> 9); + bh->b_end_io(bh, ok); + + bh = xbh; + } +} + /* * Get a request and submit it to the controller. * This routine needs to grab all the requests it possibly can from the @@ -1094,19 +1108,6 @@ } } -static inline void complete_buffers(struct buffer_head *bh, int ok) -{ - struct buffer_head *xbh; - while(bh) { - xbh = bh->b_reqnext; - bh->b_reqnext = NULL; - - blk_finished_io(bh->b_size >> 9); - bh->b_end_io(bh, ok); - - bh = xbh; - } -} /* * Mark all buffers that cmd was responsible for */ diff -urN linux-2.4.27/drivers/block/genhd.c linux-2.4.28/drivers/block/genhd.c --- linux-2.4.27/drivers/block/genhd.c 2004-02-18 05:36:31.000000000 -0800 +++ linux-2.4.28/drivers/block/genhd.c 2004-11-17 03:54:21.210381941 -0800 @@ -49,8 +49,9 @@ add_gendisk(struct gendisk *gp) { struct gendisk *sgp; + unsigned long flags; - write_lock(&gendisk_lock); + write_lock_irqsave(&gendisk_lock, flags); /* * In 2.5 this will go away. Fix the drivers who rely on @@ -70,7 +71,7 @@ gp->next = gendisk_head; gendisk_head = gp; out: - write_unlock(&gendisk_lock); + write_unlock_irqrestore(&gendisk_lock, flags); } EXPORT_SYMBOL(add_gendisk); @@ -87,15 +88,16 @@ del_gendisk(struct gendisk *gp) { struct gendisk **gpp; + unsigned long flags; - write_lock(&gendisk_lock); + write_lock_irqsave(&gendisk_lock, flags); gendisk_array[gp->major] = NULL; for (gpp = &gendisk_head; *gpp; gpp = &((*gpp)->next)) if (*gpp == gp) break; if (*gpp) *gpp = (*gpp)->next; - write_unlock(&gendisk_lock); + write_unlock_irqrestore(&gendisk_lock, flags); } EXPORT_SYMBOL(del_gendisk); @@ -113,8 +115,9 @@ { struct gendisk *gp = NULL; int maj = MAJOR(dev); + unsigned long flags; - read_lock(&gendisk_lock); + read_lock_irqsave(&gendisk_lock, flags); if ((gp = gendisk_array[maj])) goto out; @@ -123,7 +126,7 @@ if (gp->major == maj) break; out: - read_unlock(&gendisk_lock); + read_unlock_irqrestore(&gendisk_lock, flags); return gp; } @@ -143,12 +146,13 @@ { struct gendisk *gp; int error = 0; + unsigned long flags; - read_lock(&gendisk_lock); + read_lock_irqsave(&gendisk_lock, flags); for (gp = gendisk_head; gp; gp = gp->next) if ((error = walk(gp, data))) break; - read_unlock(&gendisk_lock); + read_unlock_irqrestore(&gendisk_lock, flags); return error; } @@ -160,7 +164,7 @@ struct gendisk *gp; loff_t pos = *ppos; - read_lock(&gendisk_lock); + read_lock_irq(&gendisk_lock); for (gp = gendisk_head; gp; gp = gp->next) if (!pos--) return gp; @@ -175,7 +179,7 @@ static void part_stop(struct seq_file *s, void *v) { - read_unlock(&gendisk_lock); + read_unlock_irq(&gendisk_lock); } static int part_show(struct seq_file *s, void *v) diff -urN linux-2.4.27/drivers/block/ll_rw_blk.c linux-2.4.28/drivers/block/ll_rw_blk.c --- linux-2.4.27/drivers/block/ll_rw_blk.c 2004-04-14 06:05:29.000000000 -0700 +++ linux-2.4.28/drivers/block/ll_rw_blk.c 2004-11-17 03:54:21.211381982 -0800 @@ -132,7 +132,7 @@ return max_sectors[MAJOR(dev)][MINOR(dev)]; } -inline request_queue_t *blk_get_queue(kdev_t dev) +static inline request_queue_t *__blk_get_queue(kdev_t dev) { struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); @@ -142,6 +142,11 @@ return &blk_dev[MAJOR(dev)].request_queue; } +request_queue_t *blk_get_queue(kdev_t dev) +{ + return __blk_get_queue(dev); +} + static int __blk_cleanup_queue(struct request_list *list) { struct list_head *head = &list->free; @@ -303,7 +308,7 @@ /* * can we merge the two segments, or do we need to start a new one? */ -inline int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt) +static inline int __blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt) { /* * if bh and nxt are contigous and don't cross a 4g boundary, it's ok @@ -314,6 +319,11 @@ return 0; } +int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt) +{ + return __blk_seg_merge_ok(bh, nxt); +} + static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) { if (req->nr_segments < max_segments) { @@ -326,7 +336,7 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req, struct buffer_head *bh, int max_segments) { - if (blk_seg_merge_ok(req->bhtail, bh)) + if (__blk_seg_merge_ok(req->bhtail, bh)) return 1; return ll_new_segment(q, req, max_segments); @@ -335,7 +345,7 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req, struct buffer_head *bh, int max_segments) { - if (blk_seg_merge_ok(bh, req->bh)) + if (__blk_seg_merge_ok(bh, req->bh)) return 1; return ll_new_segment(q, req, max_segments); @@ -346,7 +356,7 @@ { int total_segments = req->nr_segments + next->nr_segments; - if (blk_seg_merge_ok(req->bhtail, next->bh)) + if (__blk_seg_merge_ok(req->bhtail, next->bh)) total_segments--; if (total_segments > max_segments) @@ -1255,7 +1265,7 @@ * Stacking drivers are expected to know what they are doing. */ do { - q = blk_get_queue(bh->b_rdev); + q = __blk_get_queue(bh->b_rdev); if (!q) { printk(KERN_ERR "generic_make_request: Trying to access " diff -urN linux-2.4.27/drivers/char/agp/agp.h linux-2.4.28/drivers/char/agp/agp.h --- linux-2.4.27/drivers/char/agp/agp.h 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/drivers/char/agp/agp.h 2004-11-17 03:54:21.212382023 -0800 @@ -211,6 +211,12 @@ #ifndef PCI_DEVICE_ID_INTEL_865_G_1 #define PCI_DEVICE_ID_INTEL_865_G_1 0x2572 #endif +#ifndef PCI_DEVICE_ID_INTEL_915_G_0 +#define PCI_DEVICE_ID_INTEL_915_G_0 0x2580 +#endif +#ifndef PCI_DEVICE_ID_INTEL_915_G_1 +#define PCI_DEVICE_ID_INTEL_915_G_1 0x2582 +#endif #ifndef PCI_DEVICE_ID_INTEL_820_0 #define PCI_DEVICE_ID_INTEL_820_0 0x2500 #endif @@ -441,6 +447,12 @@ #define I810_DRAM_ROW_0 0x00000001 #define I810_DRAM_ROW_0_SDRAM 0x00000001 +/* intel I915 registers */ +#define I915_GMADDR 0x18 +#define I915_MMADDR 0x10 +#define I915_PTEADDR 0x1C +#define I915_GMCH_GMS_STOLEN_48M (0x6 << 4) +#define I915_GMCH_GMS_STOLEN_64M (0x7 << 4) /* VIA register */ diff -urN linux-2.4.27/drivers/char/agp/agpgart_be.c linux-2.4.28/drivers/char/agp/agpgart_be.c --- linux-2.4.27/drivers/char/agp/agpgart_be.c 2004-08-07 16:26:04.000000000 -0700 +++ linux-2.4.28/drivers/char/agp/agpgart_be.c 2004-11-17 03:54:21.217382229 -0800 @@ -794,7 +794,29 @@ } #endif get_page(page); - LockPage(page); + set_bit(PG_locked, &page->flags); + atomic_inc(&agp_bridge.current_memory_agp); + return (unsigned long)page_address(page); +} + +/* Exists to support ARGB cursors */ +static unsigned long i830_alloc_pages(void) +{ + struct page * page; + + page = alloc_pages(GFP_KERNEL, 2); + if (page == NULL) { + return 0; + } + +#ifdef CONFIG_X86 + if (change_page_attr(page, 4, PAGE_KERNEL_NOCACHE) < 0) { + __free_page(page); + return 0; + } +#endif + get_page(page); + set_bit(PG_locked, &page->flags); atomic_inc(&agp_bridge.current_memory_agp); return (unsigned long)page_address(page); } @@ -818,6 +840,25 @@ atomic_dec(&agp_bridge.current_memory_agp); } +static void i830_destroy_pages(unsigned long addr) +{ + void *pt = (void *) addr; + struct page *page; + + if (pt == NULL) { + return; + } + + page = virt_to_page(pt); +#ifdef CONFIG_X86 + change_page_attr(page, 4, PAGE_KERNEL); +#endif + put_page(page); + UnlockPage(page); + free_pages((unsigned long) pt, 2); + atomic_dec(&agp_bridge.current_memory_agp); +} + /* End Basic Page Allocation Routines */ void agp_enable(u32 mode) @@ -1022,33 +1063,49 @@ return new; } if(type == AGP_PHYS_MEMORY) { + unsigned long physical; + /* The I810 requires a physical address to program * it's mouse pointer into hardware. However the * Xserver still writes to it through the agp * aperture */ - if (pg_count != 1) { - return NULL; - } - new = agp_create_memory(1); + if (pg_count != 1 && pg_count != 4) return(NULL); - if (new == NULL) { - return NULL; + new = agp_create_memory(pg_count); + + if (new == NULL) return(NULL); + + MOD_INC_USE_COUNT; + if (pg_count == 1) + new->memory[0] = agp_bridge.agp_alloc_page(); + else { + /* kludge to get 4 physical pages for ARGB cursor */ + new->memory[0] = i830_alloc_pages(); + new->memory[1] = new->memory[0] + PAGE_SIZE; + new->memory[2] = new->memory[1] + PAGE_SIZE; + new->memory[3] = new->memory[2] + PAGE_SIZE; } - MOD_INC_USE_COUNT; - new->memory[0] = agp_bridge.agp_alloc_page(); if (new->memory[0] == 0) { /* Free this structure */ agp_free_memory(new); return NULL; } + physical = new->memory[0]; + new->memory[0] = virt_to_phys((void *) new->memory[0]); - new->page_count = 1; - new->num_scratch_pages = 1; - new->type = AGP_PHYS_MEMORY; - new->physical = virt_to_phys((void *) new->memory[0]); - return new; + if (pg_count == 4) { + /* kludge to enable 4 physical pages for ARGB cursor */ + new->memory[1] = virt_to_phys((void *) new->memory[1]); + new->memory[2] = virt_to_phys((void *) new->memory[2]); + new->memory[3] = virt_to_phys((void *) new->memory[3]); + } + new->page_count = pg_count; + new->num_scratch_pages = pg_count; + new->type = AGP_PHYS_MEMORY; + new->physical = virt_to_phys((void *) physical); + return(new); } return NULL; @@ -1058,7 +1115,11 @@ { agp_free_key(curr->key); if(curr->type == AGP_PHYS_MEMORY) { - agp_bridge.agp_destroy_page((unsigned long) + if (curr->page_count == 4) + i830_destroy_pages((unsigned long) + phys_to_virt(curr->memory[0])); + else + agp_bridge.agp_destroy_page((unsigned long) phys_to_virt(curr->memory[0])); vfree(curr->memory); } @@ -1108,15 +1169,20 @@ { {128, 32768, 5}, /* The 64M mode still requires a 128k gatt */ - {64, 16384, 5} + {64, 16384, 5}, + /* For I915G */ + {256, 65536, 6} }; static struct _intel_i830_private { struct pci_dev *i830_dev; /* device one */ volatile u8 *registers; + volatile u32 *gtt; /* I915G */ int gtt_entries; } intel_i830_private; +static int intel_i830_fetch_size(void); + static void intel_i830_init_gtt_entries(void) { u16 gmch_ctrl; @@ -1124,26 +1190,47 @@ u8 rdct; int local = 0; static const int ddt[4] = { 0, 16, 32, 64 }; + int size; + pci_read_config_word(agp_bridge.dev,I830_GMCH_CTRL,&gmch_ctrl); + /* We obtain the size of the GTT, which is also stored (for some + * reason) at the top of stolen memory. Then we add 4KB to that + * for the video BIOS popup, which is also stored in there. */ + size = intel_i830_fetch_size() + 4; + if (agp_bridge.dev->device != PCI_DEVICE_ID_INTEL_830_M_0 && agp_bridge.dev->device != PCI_DEVICE_ID_INTEL_845_G_0) { switch (gmch_ctrl & I855_GMCH_GMS_MASK) { case I855_GMCH_GMS_STOLEN_1M: - gtt_entries = MB(1) - KB(132); + gtt_entries = MB(1) - KB(size); break; case I855_GMCH_GMS_STOLEN_4M: - gtt_entries = MB(4) - KB(132); + gtt_entries = MB(4) - KB(size); break; case I855_GMCH_GMS_STOLEN_8M: - gtt_entries = MB(8) - KB(132); + gtt_entries = MB(8) - KB(size); break; case I855_GMCH_GMS_STOLEN_16M: - gtt_entries = MB(16) - KB(132); + gtt_entries = MB(16) - KB(size); break; case I855_GMCH_GMS_STOLEN_32M: - gtt_entries = MB(32) - KB(132); + gtt_entries = MB(32) - KB(size); + break; + case I915_GMCH_GMS_STOLEN_48M: + /* Check it's really I915 */ + if (agp_bridge.dev->device == PCI_DEVICE_ID_INTEL_915_G_0) + gtt_entries = MB(48) - KB(size); + else + gtt_entries = 0; + break; + case I915_GMCH_GMS_STOLEN_64M: + /* Check it's really I915 */ + if (agp_bridge.dev->device == PCI_DEVICE_ID_INTEL_915_G_0) + gtt_entries = MB(64) - KB(size); + else + gtt_entries = 0; break; default: gtt_entries = 0; @@ -1153,13 +1240,13 @@ { switch (gmch_ctrl & I830_GMCH_GMS_MASK) { case I830_GMCH_GMS_STOLEN_512: - gtt_entries = KB(512) - KB(132); + gtt_entries = KB(512) - KB(size); break; case I830_GMCH_GMS_STOLEN_1024: - gtt_entries = MB(1) - KB(132); + gtt_entries = MB(1) - KB(size); break; case I830_GMCH_GMS_STOLEN_8192: - gtt_entries = MB(8) - KB(132); + gtt_entries = MB(8) - KB(size); break; case I830_GMCH_GMS_LOCAL: rdct = INREG8(intel_i830_private.registers, @@ -1193,14 +1280,24 @@ int page_order; aper_size_info_fixed *size; int num_entries; - u32 temp; + u32 temp, temp2; size = agp_bridge.current_size; page_order = size->page_order; num_entries = size->num_entries; agp_bridge.gatt_table_real = 0; - pci_read_config_dword(intel_i830_private.i830_dev,I810_MMADDR,&temp); + if (agp_bridge.dev->device == PCI_DEVICE_ID_INTEL_915_G_0) { + pci_read_config_dword(intel_i830_private.i830_dev, + I915_MMADDR,&temp); + pci_read_config_dword(intel_i830_private.i830_dev, + I915_PTEADDR,&temp2); + intel_i830_private.gtt = (volatile u32 *) ioremap(temp2, 256 * 1024); + if (!intel_i830_private.gtt) return (-ENOMEM); + } else + pci_read_config_dword(intel_i830_private.i830_dev, + I810_MMADDR,&temp); + temp &= 0xfff80000; intel_i830_private.registers = (volatile u8 *) ioremap(temp,128 * 4096); @@ -1234,6 +1331,20 @@ values = A_SIZE_FIX(agp_bridge.aperture_sizes); + if (agp_bridge.dev->device == PCI_DEVICE_ID_INTEL_915_G_0) { + u32 temp, offset = 0; + pci_read_config_dword(intel_i830_private.i830_dev, + I915_GMADDR,&temp); +#define I915_256MB_ADDRESS_MASK (1<<27) + if (temp & I915_256MB_ADDRESS_MASK) + offset = 0; /* 128MB aperture */ + else + offset = 2; /* 256MB aperture */ + agp_bridge.previous_size = agp_bridge.current_size = (void *)(values + offset); + agp_bridge.aperture_size_idx = offset; + return(values[offset].size); + } + if (agp_bridge.dev->device != PCI_DEVICE_ID_INTEL_830_M_0 && agp_bridge.dev->device != PCI_DEVICE_ID_INTEL_845_G_0) { agp_bridge.previous_size = agp_bridge.current_size = (void *) values; @@ -1247,7 +1358,7 @@ agp_bridge.aperture_size_idx = 0; return(values[0].size); } else { - agp_bridge.previous_size = agp_bridge.current_size = (void *) values; + agp_bridge.previous_size = agp_bridge.current_size = (void *)(values + 1); agp_bridge.aperture_size_idx = 1; return(values[1].size); } @@ -1264,7 +1375,13 @@ current_size = A_SIZE_FIX(agp_bridge.current_size); - pci_read_config_dword(intel_i830_private.i830_dev,I810_GMADDR,&temp); + if (agp_bridge.dev->device == PCI_DEVICE_ID_INTEL_915_G_0) + pci_read_config_dword(intel_i830_private.i830_dev, + I915_GMADDR,&temp); + else + pci_read_config_dword(intel_i830_private.i830_dev, + I810_GMADDR,&temp); + agp_bridge.gart_bus_addr = (temp & PCI_BASE_ADDRESS_MEM_MASK); pci_read_config_word(agp_bridge.dev,I830_GMCH_CTRL,&gmch_ctrl); @@ -1274,15 +1391,24 @@ OUTREG32(intel_i830_private.registers,I810_PGETBL_CTL,agp_bridge.gatt_bus_addr | I810_PGETBL_ENABLED); CACHE_FLUSH(); - if (agp_bridge.needs_scratch_page == TRUE) - for (i = intel_i830_private.gtt_entries; i < current_size->num_entries; i++) - OUTREG32(intel_i830_private.registers,I810_PTE_BASE + (i * 4),agp_bridge.scratch_page); + if (agp_bridge.needs_scratch_page == TRUE) { + if (agp_bridge.dev->device == PCI_DEVICE_ID_INTEL_915_G_0) { + for (i = intel_i830_private.gtt_entries; i < current_size->num_entries; i++) + OUTREG32(intel_i830_private.gtt, i, agp_bridge.scratch_page); + } else { + for (i = intel_i830_private.gtt_entries; i < current_size->num_entries; i++) + OUTREG32(intel_i830_private.registers,I810_PTE_BASE + (i * 4),agp_bridge.scratch_page); + } + } return (0); } static void intel_i830_cleanup(void) { + if (agp_bridge.dev->device == PCI_DEVICE_ID_INTEL_915_G_0) + iounmap((void *)intel_i830_private.gtt); + iounmap((void *) intel_i830_private.registers); } @@ -1315,9 +1441,13 @@ CACHE_FLUSH(); - for (i = 0, j = pg_start; i < mem->page_count; i++, j++) - OUTREG32(intel_i830_private.registers,I810_PTE_BASE + (j * 4), - agp_bridge.mask_memory(mem->memory[i], mem->type)); + if (agp_bridge.dev->device == PCI_DEVICE_ID_INTEL_915_G_0) { + for (i = 0, j = pg_start; i < mem->page_count; i++, j++) + OUTREG32(intel_i830_private.gtt, j, agp_bridge.mask_memory(mem->memory[i], mem->type)); + } else { + for (i = 0, j = pg_start; i < mem->page_count; i++, j++) + OUTREG32(intel_i830_private.registers,I810_PTE_BASE + (j * 4),agp_bridge.mask_memory(mem->memory[i], mem->type)); + } CACHE_FLUSH(); @@ -1337,8 +1467,13 @@ return (-EINVAL); } - for (i = pg_start; i < (mem->page_count + pg_start); i++) - OUTREG32(intel_i830_private.registers,I810_PTE_BASE + (i * 4),agp_bridge.scratch_page); + if (agp_bridge.dev->device == PCI_DEVICE_ID_INTEL_915_G_0) { + for (i = pg_start; i < (mem->page_count + pg_start); i++) + OUTREG32(intel_i830_private.gtt, i, agp_bridge.scratch_page); + } else { + for (i = pg_start; i < (mem->page_count + pg_start); i++) + OUTREG32(intel_i830_private.registers,I810_PTE_BASE + (i * 4),agp_bridge.scratch_page); + } CACHE_FLUSH(); @@ -1363,24 +1498,40 @@ * aperture */ - if (pg_count != 1) return(NULL); + if (pg_count != 1 && pg_count != 4) return(NULL); - nw = agp_create_memory(1); + nw = agp_create_memory(pg_count); if (nw == NULL) return(NULL); MOD_INC_USE_COUNT; - nw->memory[0] = agp_bridge.agp_alloc_page(); - physical = nw->memory[0]; + if (pg_count == 1) + nw->memory[0] = agp_bridge.agp_alloc_page(); + else { + /* kludge to get 4 physical pages for ARGB cursor */ + nw->memory[0] = i830_alloc_pages(); + nw->memory[1] = nw->memory[0] + PAGE_SIZE; + nw->memory[2] = nw->memory[1] + PAGE_SIZE; + nw->memory[3] = nw->memory[2] + PAGE_SIZE; + } + if (nw->memory[0] == 0) { /* free this structure */ agp_free_memory(nw); return(NULL); } + physical = nw->memory[0]; + nw->memory[0] = virt_to_phys((void *) nw->memory[0]); - nw->page_count = 1; - nw->num_scratch_pages = 1; + if (pg_count == 4) { + /* kludge to enable 4 physical pages for ARGB cursor */ + nw->memory[1] = virt_to_phys((void *) nw->memory[1]); + nw->memory[2] = virt_to_phys((void *) nw->memory[2]); + nw->memory[3] = virt_to_phys((void *) nw->memory[3]); + } + nw->page_count = pg_count; + nw->num_scratch_pages = pg_count; nw->type = AGP_PHYS_MEMORY; nw->physical = virt_to_phys((void *) physical); return(nw); @@ -3447,9 +3598,9 @@ for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { tmp = agp_bridge.mask_memory(mem->memory[i], mem->type); - BUG_ON(tmp & 0xffffff0000000ffc); - pte = (tmp & 0x000000ff00000000) >> 28; - pte |=(tmp & 0x00000000fffff000); + BUG_ON(tmp & 0xffffff0000000ffcULL); + pte = (tmp & 0x000000ff00000000ULL) >> 28; + pte |=(tmp & 0x00000000fffff000ULL); pte |= 1<<1|1<<0; agp_bridge.gatt_table[j] = pte; @@ -6112,6 +6263,13 @@ "865G", intel_845_setup }, + { PCI_DEVICE_ID_INTEL_915_G_0, + PCI_VENDOR_ID_INTEL, + INTEL_I915_G, + "Intel(R)", + "915G", + intel_845_setup }, + { PCI_DEVICE_ID_INTEL_840_0, PCI_VENDOR_ID_INTEL, INTEL_I840, @@ -6835,6 +6993,34 @@ "865G Chipset.\n"); agp_bridge.type = INTEL_I810; return intel_i830_setup(i810_dev); + + case PCI_DEVICE_ID_INTEL_915_G_0: + i810_dev = pci_find_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_915_G_1, NULL); + if(i810_dev && PCI_FUNC(i810_dev->devfn) != 0) { + i810_dev = pci_find_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_915_G_1, i810_dev); + } + + if (i810_dev == NULL) { + /* + * We probably have a 915G chipset + * with an external graphics + * card. It will be initialized later + */ + printk(KERN_ERR PFX "Detected an " + "Intel(R) 915G, but could not" + " find the" + " secondary device. Assuming a " + "non-integrated video card.\n"); + agp_bridge.type = INTEL_I915_G; + break; + } + printk(KERN_INFO PFX "Detected an Intel(R) " + "915G Chipset.\n"); + agp_bridge.type = INTEL_I810; + return intel_i830_setup(i810_dev); + default: break; } diff -urN linux-2.4.27/drivers/char/applicom.c linux-2.4.28/drivers/char/applicom.c --- linux-2.4.27/drivers/char/applicom.c 2002-08-02 17:39:43.000000000 -0700 +++ linux-2.4.28/drivers/char/applicom.c 2004-11-17 03:54:21.219382311 -0800 @@ -222,6 +222,7 @@ if (!RamIO) { printk(KERN_INFO "ac.o: Failed to ioremap PCI memory space at 0x%lx\n", PCI_BASE_ADDRESS(dev)); + pci_disable_device(dev); return -EIO; } @@ -233,12 +234,14 @@ (unsigned long)RamIO,0))) { printk(KERN_INFO "ac.o: PCI Applicom device doesn't have correct signature.\n"); iounmap(RamIO); + pci_disable_device(dev); continue; } if (request_irq(dev->irq, &ac_interrupt, SA_SHIRQ, "Applicom PCI", &dummy)) { printk(KERN_INFO "Could not allocate IRQ %d for PCI Applicom device.\n", dev->irq); iounmap(RamIO); + pci_disable_device(dev); apbs[boardno - 1].RamIO = 0; continue; } @@ -265,12 +268,6 @@ /* Now try the specified ISA cards */ -#warning "LEAK" - RamIO = ioremap(mem, LEN_RAM_IO * MAX_ISA_BOARD); - - if (!RamIO) - printk(KERN_INFO "ac.o: Failed to ioremap ISA memory space at 0x%lx\n", mem); - for (i = 0; i < MAX_ISA_BOARD; i++) { RamIO = ioremap(mem + (LEN_RAM_IO * i), LEN_RAM_IO); @@ -293,7 +290,8 @@ iounmap((void *) RamIO); apbs[boardno - 1].RamIO = 0; } - apbs[boardno - 1].irq = irq; + else + apbs[boardno - 1].irq = irq; } else apbs[boardno - 1].irq = 0; @@ -368,7 +366,7 @@ if (count != sizeof(struct st_ram_io) + sizeof(struct mailbox)) { static int warncount = 5; if (warncount) { - printk(KERN_INFO "Hmmm. write() of Applicom card, length %d != expected %d\n", + printk(KERN_INFO "Hmmm. write() of Applicom card, length %zd != expected %zd\n", count, sizeof(struct st_ram_io) + sizeof(struct mailbox)); warncount--; } @@ -476,18 +474,17 @@ return 0; } -static int do_ac_read(int IndexCard, char *buf) +static int do_ac_read(int IndexCard, char *buf, + struct st_ram_io *st_loc, struct mailbox *mailbox) { - struct st_ram_io st_loc; - struct mailbox tmpmailbox; /* bounce buffer - can't copy to user space with cli() */ unsigned long from = (unsigned long)apbs[IndexCard].RamIO + RAM_TO_PC; - unsigned char *to = (unsigned char *)&tmpmailbox; + unsigned char *to = (unsigned char *)&mailbox; #ifdef DEBUG int c; #endif - st_loc.tic_owner_to_pc = readb(apbs[IndexCard].RamIO + TIC_OWNER_TO_PC); - st_loc.numcard_owner_to_pc = readb(apbs[IndexCard].RamIO + NUMCARD_OWNER_TO_PC); + st_loc->tic_owner_to_pc = readb(apbs[IndexCard].RamIO + TIC_OWNER_TO_PC); + st_loc->numcard_owner_to_pc = readb(apbs[IndexCard].RamIO + NUMCARD_OWNER_TO_PC); { @@ -510,32 +507,24 @@ printk("Read from applicom card #%d. struct st_ram_io follows:", NumCard); for (c = 0; c < sizeof(struct st_ram_io);) { - printk("\n%5.5X: %2.2X", c, ((unsigned char *) &st_loc)[c]); + printk("\n%5.5X: %2.2X", c, ((unsigned char *)st_loc)[c]); for (c++; c % 8 && c < sizeof(struct st_ram_io); c++) { - printk(" %2.2X", ((unsigned char *) &st_loc)[c]); + printk(" %2.2X", ((unsigned char *)st_loc)[c]); } } printk("\nstruct mailbox follows:"); for (c = 0; c < sizeof(struct mailbox);) { - printk("\n%5.5X: %2.2X", c, ((unsigned char *) &tmpmailbox)[c]); + printk("\n%5.5X: %2.2X", c, ((unsigned char *)mailbox)[c]); for (c++; c % 8 && c < sizeof(struct mailbox); c++) { - printk(" %2.2X", ((unsigned char *) &tmpmailbox)[c]); + printk(" %2.2X", ((unsigned char *)mailbox)[c]); } } printk("\n"); #endif - -#warning "Je suis stupide. DW. - copy*user in cli" - - if (copy_to_user(buf, &st_loc, sizeof(struct st_ram_io))) - return -EFAULT; - if (copy_to_user(&buf[sizeof(struct st_ram_io)], &tmpmailbox, sizeof(struct mailbox))) - return -EFAULT; - return (sizeof(struct st_ram_io) + sizeof(struct mailbox)); } @@ -551,7 +540,7 @@ #endif /* No need to ratelimit this. Only root can trigger it anyway */ if (count != sizeof(struct st_ram_io) + sizeof(struct mailbox)) { - printk( KERN_WARNING "Hmmm. read() of Applicom card, length %d != expected %d\n", + printk( KERN_WARNING "Hmmm. read() of Applicom card, length %zd != expected %zd\n", count,sizeof(struct st_ram_io) + sizeof(struct mailbox)); return -EINVAL; } @@ -570,11 +559,19 @@ tmp = readb(apbs[i].RamIO + DATA_TO_PC_READY); if (tmp == 2) { + struct st_ram_io st_loc; + struct mailbox mailbox; + /* Got a packet for us */ - ret = do_ac_read(i, buf); + ret = do_ac_read(i, buf, &st_loc, &mailbox); spin_unlock_irqrestore(&apbs[i].mutex, flags); set_current_state(TASK_RUNNING); remove_wait_queue(&FlagSleepRec, &wait); + + if (copy_to_user(buf, &st_loc, sizeof(st_loc))) + return -EFAULT; + if (copy_to_user(buf + sizeof(st_loc), &mailbox, sizeof(mailbox))) + return -EFAULT; return tmp; } diff -urN linux-2.4.27/drivers/char/drm/drm_dma.h linux-2.4.28/drivers/char/drm/drm_dma.h --- linux-2.4.27/drivers/char/drm/drm_dma.h 2003-11-28 10:26:20.000000000 -0800 +++ linux-2.4.28/drivers/char/drm/drm_dma.h 2004-11-17 03:54:21.219382311 -0800 @@ -648,7 +648,7 @@ * for the same vblank sequence number; nothing to be done in * that case */ - list_for_each( ( (struct list_head *) vbl_sig ), &dev->vbl_sigs.head ) { + list_for_each_entry( vbl_sig, &dev->vbl_sigs.head, head ) { if (vbl_sig->sequence == vblwait.request.sequence && vbl_sig->info.si_signo == vblwait.request.signal && vbl_sig->task == current) @@ -699,19 +699,20 @@ void DRM(vbl_send_signals)( drm_device_t *dev ) { - struct list_head *tmp; + struct list_head *list, *tmp; drm_vbl_sig_t *vbl_sig; unsigned int vbl_seq = atomic_read( &dev->vbl_received ); unsigned long flags; spin_lock_irqsave( &dev->vbl_lock, flags ); - list_for_each_safe( ( (struct list_head *) vbl_sig ), tmp, &dev->vbl_sigs.head ) { + list_for_each_safe( list, tmp, &dev->vbl_sigs.head ) { + vbl_sig = list_entry( list, drm_vbl_sig_t, head ); if ( ( vbl_seq - vbl_sig->sequence ) <= (1<<23) ) { vbl_sig->info.si_code = vbl_seq; send_sig_info( vbl_sig->info.si_signo, &vbl_sig->info, vbl_sig->task ); - list_del( (struct list_head *) vbl_sig ); + list_del( list ); kfree( vbl_sig ); diff -urN linux-2.4.27/drivers/char/drm/radeon_state.c linux-2.4.28/drivers/char/drm/radeon_state.c --- linux-2.4.27/drivers/char/drm/radeon_state.c 2003-11-28 10:26:20.000000000 -0800 +++ linux-2.4.28/drivers/char/drm/radeon_state.c 2004-11-17 03:54:21.221382393 -0800 @@ -1223,7 +1223,7 @@ /* Update the input parameters for next time */ image->y += height; image->height -= height; - (const u8 *)image->data += size; + image->data = (const u8 *)image->data + size; } while (image->height > 0); /* Flush the pixel cache after the blit completes. This ensures diff -urN linux-2.4.27/drivers/char/ftape/lowlevel/ftape-bsm.c linux-2.4.28/drivers/char/ftape/lowlevel/ftape-bsm.c --- linux-2.4.27/drivers/char/ftape/lowlevel/ftape-bsm.c 2000-10-16 12:58:51.000000000 -0700 +++ linux-2.4.28/drivers/char/ftape/lowlevel/ftape-bsm.c 2004-11-17 03:54:21.222382434 -0800 @@ -203,6 +203,7 @@ ft_format_code == fmt_1100ft) { SectorCount *ptr = (SectorCount *)bad_sector_map; unsigned int sector; + __u16 *ptr16; while((sector = get_sector(ptr++)) != 0) { if ((ft_format_code == fmt_big || @@ -218,9 +219,10 @@ } /* Display old ftape's end-of-file marks */ - while ((sector = get_unaligned(((__u16*)ptr)++)) != 0) { + ptr16 = (__u16*)ptr; + while ((sector = get_unaligned(ptr16++)) != 0) { TRACE(ft_t_noise, "Old ftape eof mark: %4d/%2d", - sector, get_unaligned(((__u16*)ptr)++)); + sector, get_unaligned(ptr16++)); } } else { /* fixed size format */ for (i = ft_first_data_segment; diff -urN linux-2.4.27/drivers/char/ftape/lowlevel/ftape-bsm.h linux-2.4.28/drivers/char/ftape/lowlevel/ftape-bsm.h --- linux-2.4.27/drivers/char/ftape/lowlevel/ftape-bsm.h 1997-11-25 14:45:27.000000000 -0800 +++ linux-2.4.28/drivers/char/ftape/lowlevel/ftape-bsm.h 2004-11-17 03:54:21.222382434 -0800 @@ -47,7 +47,7 @@ */ typedef struct NewSectorMap { __u8 bytes[3]; -} SectorCount __attribute__((packed)); +} __attribute__((packed)) SectorCount; /* diff -urN linux-2.4.27/drivers/char/ftape/zftape/zftape-eof.c linux-2.4.28/drivers/char/ftape/zftape/zftape-eof.c --- linux-2.4.27/drivers/char/ftape/zftape/zftape-eof.c 1999-11-23 10:29:15.000000000 -0800 +++ linux-2.4.28/drivers/char/ftape/zftape/zftape-eof.c 2004-11-17 03:54:21.222382434 -0800 @@ -123,7 +123,7 @@ while (ptr + 3 < limit) { if (get_unaligned((__u32*)ptr)) { - ++(__u32*)ptr; + ptr += sizeof(__u32); } else { return ptr; } diff -urN linux-2.4.27/drivers/char/ib700wdt.c linux-2.4.28/drivers/char/ib700wdt.c --- linux-2.4.27/drivers/char/ib700wdt.c 2002-11-28 15:53:12.000000000 -0800 +++ linux-2.4.28/drivers/char/ib700wdt.c 2004-11-17 03:54:21.223382475 -0800 @@ -133,7 +133,7 @@ ibwdt_ping(void) { /* Write a watchdog value */ - outb_p(wd_times[wd_marg