/*	$NetBSD: if_emac.c,v 1.61 2026/06/14 00:02:35 rkujawa Exp $	*/

/*
 * Copyright 2001, 2002 Wasabi Systems, Inc.
 * All rights reserved.
 *
 * Written by Simon Burge and Jason Thorpe for Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed for the NetBSD Project by
 *      Wasabi Systems, Inc.
 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
 *    or promote products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * emac(4) supports following ibm4xx's EMACs.
 *   XXXX: ZMII not supported yet...
 *   The 'TCP Acceleration Hardware' (TAH) checksum offload engine is
 *   supported on the 460EX/GT with options EMAC_TAH.
 *
 *            tested
 *            ------
 * 405EP	-  10/100 x2
 * 405EX/EXr	o  10/100/1000 x2 (EXr x1), STA v2, 256bit hash-Table, RGMII
 * 405GP/GPr	o  10/100
 * 440EP	-  10/100 x2, ZMII
 * 440GP	-  10/100 x2, ZMII
 * 440GX	-  10/100/1000 x4, ZMII/RGMII(ch 2, 3), TAH(ch 2, 3)
 * 440SP	-  10/100/1000
 * 440SPe	-  10/100/1000, STA v2
 * 460EX	-  10/100/1000, STA v2, 256bit hash-Table, RGMII, TAH
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_emac.c,v 1.61 2026/06/14 00:02:35 rkujawa Exp $");

#ifdef _KERNEL_OPT
#include "opt_emac.h"
#ifdef EMAC_TAH
#include "opt_inet.h"
#endif
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/timevar.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/cpu.h>
#include <sys/device.h>

#include <sys/rndsource.h>

#include <uvm/uvm_extern.h>		/* for PAGE_SIZE */

#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_ether.h>

#include <net/bpf.h>

#ifdef EMAC_TAH
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/in_offload.h>
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/in6_offload.h>
#endif
#endif

#include <powerpc/ibm4xx/cpu.h>
#include <powerpc/ibm4xx/dcr4xx.h>
#include <powerpc/ibm4xx/mal405gp.h>
#ifdef EMAC_TAH
#include <powerpc/ibm4xx/amcc460ex.h>
#endif
#include <powerpc/ibm4xx/dev/emacreg.h>
#include <powerpc/ibm4xx/dev/if_emacreg.h>
#include <powerpc/ibm4xx/dev/if_emacvar.h>
#include <powerpc/ibm4xx/dev/malvar.h>
#include <powerpc/ibm4xx/dev/opbreg.h>
#include <powerpc/ibm4xx/dev/opbvar.h>
#include <powerpc/ibm4xx/dev/plbvar.h>
#if defined(EMAC_ZMII_PHY) || defined(EMAC_RGMII_PHY)
#include <powerpc/ibm4xx/dev/rmiivar.h>
#endif

#include <dev/mii/miivar.h>

#include "locators.h"


/*
 * Transmit descriptor list size.  There are two Tx channels, each with
 * up to 256 hardware descriptors available.  We currently use one Tx
 * channel.  We tell the upper layers that they can queue a lot of
 * packets, and we go ahead and manage up to 64 of them at a time.  We
 * allow up to 16 DMA segments per packet.
 */
#define	EMAC_NTXSEGS		16
#define	EMAC_TXQUEUELEN		64
#define	EMAC_TXQUEUELEN_MASK	(EMAC_TXQUEUELEN - 1)
#define	EMAC_TXQUEUE_GC		(EMAC_TXQUEUELEN / 4)
#define	EMAC_NTXDESC		256
#define	EMAC_NTXDESC_MASK	(EMAC_NTXDESC - 1)
#define	EMAC_NEXTTX(x)		(((x) + 1) & EMAC_NTXDESC_MASK)
#define	EMAC_NEXTTXS(x)		(((x) + 1) & EMAC_TXQUEUELEN_MASK)

/*
 * Receive descriptor list size.  There is one Rx channel with up to 256
 * hardware descriptors available.  We allocate 64 receive descriptors,
 * each with a 2k buffer (MCLBYTES).
 */
#define	EMAC_NRXDESC		64
#define	EMAC_NRXDESC_MASK	(EMAC_NRXDESC - 1)
#define	EMAC_NEXTRX(x)		(((x) + 1) & EMAC_NRXDESC_MASK)
#define	EMAC_PREVRX(x)		(((x) - 1) & EMAC_NRXDESC_MASK)

#ifdef EMAC_TAH
/*
 * TSO via the TAH. Tx DMA maps must take a full combined packet
 * (the MAL buffer length field is 12 bits, so segments stay below 4KB)
 */
#define	EMAC_TSO_NSSR		6	/* TAH_SSR0..5 */
#define	EMAC_TSO_NTXSEGS	40	/* 64KB of 2KB clusters + slack */
#define	EMAC_TSO_MAXLEN		(IP_MAXPACKET + ETHER_HDR_LEN)
#define	EMAC_TSO_MAXSEGSZ	MCLBYTES
#define	EMAC_TSO_SEG_MIN	168
#define	EMAC_TSO_SEG_MAX	9700
#endif

/*
 * Transmit/receive descriptors that are DMA'd to the EMAC.
 */
struct emac_control_data {
	struct mal_descriptor ecd_txdesc[EMAC_NTXDESC];
	struct mal_descriptor ecd_rxdesc[EMAC_NRXDESC];
};

#define	EMAC_CDOFF(x)		offsetof(struct emac_control_data, x)
#define	EMAC_CDTXOFF(x)		EMAC_CDOFF(ecd_txdesc[(x)])
#define	EMAC_CDRXOFF(x)		EMAC_CDOFF(ecd_rxdesc[(x)])

/*
 * Software state for transmit jobs.
 */
struct emac_txsoft {
	struct mbuf *txs_mbuf;		/* head of mbuf chain */
	bus_dmamap_t txs_dmamap;	/* our DMA map */
	int txs_firstdesc;		/* first descriptor in packet */
	int txs_lastdesc;		/* last descriptor in packet */
	int txs_ndesc;			/* # of descriptors used */
#ifdef EMAC_TAH
	int8_t txs_ssr;			/* TAH_SSR slot held, -1 = none */
	uint16_t txs_opackets;		/* wire packets this job produces */
#endif
};

/*
 * Software state for receive descriptors.
 */
struct emac_rxsoft {
	struct mbuf *rxs_mbuf;		/* head of mbuf chain */
	bus_dmamap_t rxs_dmamap;	/* our DMA map */
};

/*
 * Software state per device.
 */
struct emac_softc {
	device_t sc_dev;		/* generic device information */
	int sc_instance;		/* instance no. */
	bus_space_tag_t sc_st;		/* bus space tag */
	bus_space_handle_t sc_sh;	/* bus space handle */
	bus_dma_tag_t sc_dmat;		/* bus DMA tag */
	struct ethercom sc_ethercom;	/* ethernet common data */
	void *sc_sdhook;		/* shutdown hook */
	void *sc_powerhook;		/* power management hook */

	struct mii_data sc_mii;		/* MII/media information */
	struct callout sc_callout;	/* tick callout */

	uint32_t sc_mr1;		/* copy of Mode Register 1 */
	uint32_t sc_stacr_read;		/* Read opcode of STAOPC of STACR */
	uint32_t sc_stacr_write;	/* Write opcode of STAOPC of STACR */
	uint32_t sc_stacr_bits;		/* misc bits of STACR */
	bool sc_stacr_completed;	/* Operation completed of STACR */
	int sc_htsize;			/* Hash Table size */
	bool sc_ethcfg_ecs;		/* clock select via SDR0_ETH_CFG */
#ifdef EMAC_TAH
	bus_space_handle_t sc_tahh;	/* TAH bus space handle */
	bool sc_tah;			/* TAH present, mapped and in path */
	bool sc_tah_cvr;		/* RX checksum verification enabled */

	/* IFF_DEBUG diagnostics for the per-packet status interrupt */
	uint32_t sc_isr_seen;		/* accumulated ISR bits */
	uint32_t sc_isr_zero;		/* interrupts with empty ISR */
	struct timeval sc_isr_last;	/* report rate limit */

	/*
	 * TSO segment-size register cache: a TAH_SSR slot may only be
	 * rewritten while no unreaped transmit job references it.
	 */
	uint16_t sc_ssr_bytes[EMAC_TSO_NSSR];	/* programmed size, 0 = none */
	uint16_t sc_ssr_refs[EMAC_TSO_NSSR];	/* in-flight references */
	struct mbuf *sc_txpending;	/* sw-segmented chain (m_nextpkt) */
#endif

	bus_dmamap_t sc_cddmamap;	/* control data dma map */
#define	sc_cddma	sc_cddmamap->dm_segs[0].ds_addr

	/* Software state for transmit/receive descriptors. */
	struct emac_txsoft sc_txsoft[EMAC_TXQUEUELEN];
	struct emac_rxsoft sc_rxsoft[EMAC_NRXDESC];

	/* Control data structures. */
	struct emac_control_data *sc_control_data;
#define	sc_txdescs	sc_control_data->ecd_txdesc
#define	sc_rxdescs	sc_control_data->ecd_rxdesc

#ifdef EMAC_EVENT_COUNTERS
	struct evcnt sc_ev_rxintr;	/* Rx interrupts */
	struct evcnt sc_ev_txintr;	/* Tx interrupts */
	struct evcnt sc_ev_rxde;	/* Rx descriptor interrupts */
	struct evcnt sc_ev_txde;	/* Tx descriptor interrupts */
	struct evcnt sc_ev_intr;	/* General EMAC interrupts */

	struct evcnt sc_ev_txreap;	/* Calls to Tx descriptor reaper */
	struct evcnt sc_ev_txsstall;	/* Tx stalled due to no txs */
	struct evcnt sc_ev_txdstall;	/* Tx stalled due to no txd */
	struct evcnt sc_ev_txdrop;	/* Tx packets dropped (too many segs) */
	struct evcnt sc_ev_tu;		/* Tx underrun */
#ifdef EMAC_TAH
	struct evcnt sc_ev_txcsum;	/* Tx packets checksummed by the TAH */
	struct evcnt sc_ev_tahted;	/* TAH transmit errors (TSR) */
	struct evcnt sc_ev_rxcsum;	/* Rx packets verified by the TAH */
	struct evcnt sc_ev_rxcsumbad;	/* Rx checksum errors from the TAH */
	struct evcnt sc_ev_txtso;	/* Tx bursts segmented by the TAH */
	struct evcnt sc_ev_txtsofb;	/* Tx TSO software fallbacks */
#endif
#endif /* EMAC_EVENT_COUNTERS */

	int sc_txfree;			/* number of free Tx descriptors */
	int sc_txnext;			/* next ready Tx descriptor */

	int sc_txsfree;			/* number of free Tx jobs */
	int sc_txsnext;			/* next ready Tx job */
	int sc_txsdirty;		/* dirty Tx jobs */

	int sc_rxptr;			/* next ready RX descriptor/descsoft */

	krndsource_t rnd_source;	/* random source */

	void (*sc_rmii_enable)(device_t, int);		/* reduced MII enable */
	void (*sc_rmii_disable)(device_t, int);		/* reduced MII disable*/
	void (*sc_rmii_speed)(device_t, int, int);	/* reduced MII speed */
};

#ifdef EMAC_EVENT_COUNTERS
#define	EMAC_EVCNT_INCR(ev)	(ev)->ev_count++
#else
#define	EMAC_EVCNT_INCR(ev)	/* nothing */
#endif

#define	EMAC_CDTXADDR(sc, x)	((sc)->sc_cddma + EMAC_CDTXOFF((x)))
#define	EMAC_CDRXADDR(sc, x)	((sc)->sc_cddma + EMAC_CDRXOFF((x)))

#define	EMAC_CDTXSYNC(sc, x, n, ops)					\
do {									\
	int __x, __n;							\
									\
	__x = (x);							\
	__n = (n);							\
									\
	/* If it will wrap around, sync to the end of the ring. */	\
	if ((__x + __n) > EMAC_NTXDESC) {				\
		bus_dmamap_sync((sc)->sc_dmat, (sc)->sc_cddmamap,	\
		    EMAC_CDTXOFF(__x), sizeof(struct mal_descriptor) *	\
		    (EMAC_NTXDESC - __x), (ops));			\
		__n -= (EMAC_NTXDESC - __x);				\
		__x = 0;						\
	}								\
									\
	/* Now sync whatever is left. */				\
	bus_dmamap_sync((sc)->sc_dmat, (sc)->sc_cddmamap,		\
	    EMAC_CDTXOFF(__x), sizeof(struct mal_descriptor) * __n, (ops)); \
} while (/*CONSTCOND*/0)

#define	EMAC_CDRXSYNC(sc, x, ops)					\
do {									\
	bus_dmamap_sync((sc)->sc_dmat, (sc)->sc_cddmamap,		\
	    EMAC_CDRXOFF((x)), sizeof(struct mal_descriptor), (ops));	\
} while (/*CONSTCOND*/0)

#define	EMAC_INIT_RXDESC(sc, x)						\
do {									\
	struct emac_rxsoft *__rxs = &(sc)->sc_rxsoft[(x)];		\
	struct mal_descriptor *__rxd = &(sc)->sc_rxdescs[(x)];		\
	struct mbuf *__m = __rxs->rxs_mbuf;				\
									\
	/*								\
	 * Note: We scoot the packet forward 2 bytes in the buffer	\
	 * so that the payload after the Ethernet header is aligned	\
	 * to a 4-byte boundary.					\
	 */								\
	__m->m_data = __m->m_ext.ext_buf + 2;				\
									\
	__rxd->md_data = __rxs->rxs_dmamap->dm_segs[0].ds_addr + 2;	\
	__rxd->md_data_len = __m->m_ext.ext_size - 2;			\
	__rxd->md_stat_ctrl = MAL_RX_EMPTY | MAL_RX_INTERRUPT |		\
	    /* Set wrap on last descriptor. */				\
	    (((x) == EMAC_NRXDESC - 1) ? MAL_RX_WRAP : 0);		\
	EMAC_CDRXSYNC((sc), (x), BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); \
} while (/*CONSTCOND*/0)

#define	EMAC_WRITE(sc, reg, val) \
	bus_space_write_stream_4((sc)->sc_st, (sc)->sc_sh, (reg), (val))
#define	EMAC_READ(sc, reg) \
	bus_space_read_stream_4((sc)->sc_st, (sc)->sc_sh, (reg))

#ifdef EMAC_TAH
#define	TAH_WRITE(sc, reg, val) \
	bus_space_write_stream_4((sc)->sc_st, (sc)->sc_tahh, (reg), (val))
#define	TAH_READ(sc, reg) \
	bus_space_read_stream_4((sc)->sc_st, (sc)->sc_tahh, (reg))
#endif

#define	EMAC_SET_FILTER(aht, crc) \
do {									\
	(aht)[3 - (((crc) >> 26) >> 4)] |= 1 << (((crc) >> 26) & 0xf);	\
} while (/*CONSTCOND*/0)
#define	EMAC_SET_FILTER256(aht, crc) \
do {									\
	(aht)[7 - (((crc) >> 24) >> 5)] |= 1 << (((crc) >> 24) & 0x1f);	\
} while (/*CONSTCOND*/0)

static int	emac_match(device_t, cfdata_t, void *);
static void	emac_attach(device_t, device_t, void *);

static int	emac_intr(void *);
static void	emac_shutdown(void *);

static void	emac_start(struct ifnet *);
static int	emac_ioctl(struct ifnet *, u_long, void *);
static int	emac_init(struct ifnet *);
static void	emac_stop(struct ifnet *, int);
static void	emac_watchdog(struct ifnet *);

static int	emac_add_rxbuf(struct emac_softc *, int);
static void	emac_rxdrain(struct emac_softc *);
static int	emac_set_filter(struct emac_softc *);
static int	emac_txreap(struct emac_softc *);

static void	emac_soft_reset(struct emac_softc *);
static void	emac_smart_reset(struct emac_softc *);
#ifdef EMAC_TAH
static void	emac_tah_reset(struct emac_softc *);
#endif

static int	emac_mii_readreg(device_t, int, int, uint16_t *);
static int	emac_mii_writereg(device_t, int, int, uint16_t);
static void	emac_mii_statchg(struct ifnet *);
static uint32_t	emac_mii_wait(struct emac_softc *);
static void	emac_mii_tick(void *);

int		emac_copy_small = 0;

CFATTACH_DECL_NEW(emac, sizeof(struct emac_softc),
    emac_match, emac_attach, NULL, NULL);


static int
emac_match(device_t parent, cfdata_t cf, void *aux)
{
	struct opb_attach_args *oaa = aux;

	/* match only on-chip ethernet devices */
	if (strcmp(oaa->opb_name, cf->cf_name) == 0)
		return 1;

	return 0;
}

static void
emac_attach(device_t parent, device_t self, void *aux)
{
	struct opb_attach_args *oaa = aux;
	struct emac_softc *sc = device_private(self);
	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
	struct mii_data *mii = &sc->sc_mii;
	const char * xname = device_xname(self);
	bus_dma_segment_t seg;
	int error, i, nseg, opb_freq, opbc, mii_phy = MII_PHY_ANY;
	uint8_t enaddr[ETHER_ADDR_LEN];

	bus_space_map(oaa->opb_bt, oaa->opb_addr, EMAC_NREG, 0, &sc->sc_sh);

	sc->sc_dev = self;
	sc->sc_instance = oaa->opb_instance;
	sc->sc_st = oaa->opb_bt;
	sc->sc_dmat = oaa->opb_dmat;

	callout_init(&sc->sc_callout, 0);

	aprint_naive("\n");
	aprint_normal(": Ethernet Media Access Controller\n");

	/* Fetch the Ethernet address. */
	if (! ether_getaddr(self, enaddr)) {
		aprint_error_dev(self, "unable to get mac-address\n");
		return;
	}
	aprint_normal_dev(self, "Ethernet address %s\n", ether_sprintf(enaddr));

#if defined(EMAC_ZMII_PHY) || defined(EMAC_RGMII_PHY)
	/* Fetch the MII offset. */
	prop_dictionary_get_uint32(device_properties(self),
	    "mii-phy", &mii_phy);

#ifdef EMAC_ZMII_PHY
	if (oaa->opb_flags & OPB_FLAGS_EMAC_RMII_ZMII)
		zmii_attach(parent, sc->sc_instance, &sc->sc_rmii_enable,
		    &sc->sc_rmii_disable, &sc->sc_rmii_speed);
#endif
#ifdef EMAC_RGMII_PHY
	if (oaa->opb_flags & OPB_FLAGS_EMAC_RMII_RGMII)
		rgmii_attach(parent, sc->sc_instance, &sc->sc_rmii_enable,
		    &sc->sc_rmii_disable, &sc->sc_rmii_speed);
#endif
#endif

	/*
	 * Allocate the control data structures, and create and load the
	 * DMA map for it.
	 */
	if ((error = bus_dmamem_alloc(sc->sc_dmat,
	    sizeof(struct emac_control_data), 0, 0, &seg, 1, &nseg, 0)) != 0) {
		aprint_error_dev(self,
		    "unable to allocate control data, error = %d\n", error);
		goto fail_0;
	}

	/*
	 * Map the descriptor ring UNCACHED, no cache snooping of any kind.
	 */
	if ((error = bus_dmamem_map(sc->sc_dmat, &seg, nseg,
	    sizeof(struct emac_control_data), (void **)&sc->sc_control_data,
	    BUS_DMA_DONTCACHE)) != 0) {
		aprint_error_dev(self,
		    "unable to map control data, error = %d\n", error);
		goto fail_1;
	}

	if ((error = bus_dmamap_create(sc->sc_dmat,
	    sizeof(struct emac_control_data), 1,
	    sizeof(struct emac_control_data), 0, 0, &sc->sc_cddmamap)) != 0) {
		aprint_error_dev(self,
		    "unable to create control data DMA map, error = %d\n",
		    error);
		goto fail_2;
	}

	if ((error = bus_dmamap_load(sc->sc_dmat, sc->sc_cddmamap,
	    sc->sc_control_data, sizeof(struct emac_control_data), NULL,
	    0)) != 0) {
		aprint_error_dev(self,
		    "unable to load control data DMA map, error = %d\n", error);
		goto fail_3;
	}

#ifdef EMAC_TAH
	/*
	 * Map and configure the TAH offload 
	 */
	if (oaa->opb_flags & OPB_FLAGS_EMAC_TAH) {
		if (bus_space_map(oaa->opb_bt,
		    AMCC460EX_TAH0_BASE + 0x100 * sc->sc_instance,
		    TAH_NREG, 0, &sc->sc_tahh) == 0) {
			sc->sc_tah = true;
			emac_tah_reset(sc);
			aprint_normal_dev(self,
			    "TAH offload engine, rev. 0x%08x\n",
			    TAH_READ(sc, TAH_REVID));
		} else
			aprint_error_dev(self,
			    "unable to map TAH registers, offload disabled\n");
	}
#endif

	/*
	 * Create the transmit buffer DMA maps, dimensioned for a full
	 * TSO burst when the TAH will segment for us.
	 */
	for (i = 0; i < EMAC_TXQUEUELEN; i++) {
#ifdef EMAC_TAH
		if (sc->sc_tah)
			error = bus_dmamap_create(sc->sc_dmat,
			    EMAC_TSO_MAXLEN, EMAC_TSO_NTXSEGS,
			    EMAC_TSO_MAXSEGSZ, 0, 0,
			    &sc->sc_txsoft[i].txs_dmamap);
		else
#endif
			error = bus_dmamap_create(sc->sc_dmat, MCLBYTES,
			    EMAC_NTXSEGS, MCLBYTES, 0, 0,
			    &sc->sc_txsoft[i].txs_dmamap);
		if (error != 0) {
			aprint_error_dev(self,
			    "unable to create tx DMA map %d, error = %d\n",
			    i, error);
			goto fail_4;
		}
#ifdef EMAC_TAH
		sc->sc_txsoft[i].txs_ssr = -1;
#endif
	}

	/*
	 * Create the receive buffer DMA maps.
	 */
	for (i = 0; i < EMAC_NRXDESC; i++) {
		if ((error = bus_dmamap_create(sc->sc_dmat, MCLBYTES, 1,
		    MCLBYTES, 0, 0, &sc->sc_rxsoft[i].rxs_dmamap)) != 0) {
			aprint_error_dev(self,
			    "unable to create rx DMA map %d, error = %d\n",
			    i, error);
			goto fail_5;
		}
		sc->sc_rxsoft[i].rxs_mbuf = NULL;
	}

	/*
	 * 460EX/GT moved the EMAC clock-select bits used during soft
	 * reset from SDR0_MFR to SDR0_ETH_CFG.
	 */
	sc->sc_ethcfg_ecs = (oaa->opb_flags & OPB_FLAGS_EMAC_ETHCFG_ECS) != 0;

	if (sc->sc_ethcfg_ecs) {
		uint32_t sdr;

		/*
		 * Bypass the TAH offload engine 
		 */
		sdr = mfsdr(DCR_SDR0_ETH_CFG);
#ifdef EMAC_TAH
		if (sc->sc_tah)
			sdr &= ~SDR0_ETH_CFG_TAH_BYPASS(sc->sc_instance);
		else
#endif
			sdr |= SDR0_ETH_CFG_TAH_BYPASS(sc->sc_instance);
		mtsdr(DCR_SDR0_ETH_CFG, sdr);
	}

	/* Soft Reset the EMAC.  The chip to a known state. */
	emac_soft_reset(sc);

	opb_freq = opb_get_frequency();
	switch (opb_freq) {
	case  33333333: opbc =  STACR_OPBC_33MHZ; break;
	case  50000000: opbc =  STACR_OPBC_50MHZ; break;
	case  66666666: opbc =  STACR_OPBC_66MHZ; break;
	case  83333333: opbc =  STACR_OPBC_83MHZ; break;
	case 100000000: opbc = STACR_OPBC_100MHZ; break;

	default:
		if (opb_freq > 100000000) {
			opbc = STACR_OPBC_A100MHZ;
			break;
		}
		aprint_error_dev(self, "unsupported OPB frequency %dMHz\n",
		    opb_freq / 1000 / 1000);
		goto fail_5;
	}
	if (oaa->opb_flags & OPB_FLAGS_EMAC_GBE) {
		sc->sc_mr1 =
		    MR1_RFS_GBE(MR1__FS_16KB)	|
#ifdef EMAC_TAH
		    /*
		     * The 460EX EMAC0/1 transmit FIFO is 2KB!
		     */
		    MR1_TFS_GBE(MR1__FS_2KB)	|
#else
		    MR1_TFS_GBE(MR1__FS_16KB)	|
#endif
		    MR1_TR0_MULTIPLE		|
		    MR1_OBCI(opbc);
		/*
		 * Do NOT set MR1_MWSW here
		 * EMAC holds the last packet's TX status to overlap 
		 * it with the next packet
		 */
		sc->sc_ethercom.ec_capabilities |= ETHERCAP_JUMBO_MTU;

		if (oaa->opb_flags & OPB_FLAGS_EMAC_STACV2) {
			sc->sc_stacr_read = STACR_STAOPC_READ;
			sc->sc_stacr_write = STACR_STAOPC_WRITE;
			sc->sc_stacr_bits = STACR_OC;
			sc->sc_stacr_completed = false;
		} else {
			sc->sc_stacr_read = STACR_READ;
			sc->sc_stacr_write = STACR_WRITE;
			sc->sc_stacr_completed = true;
		}
	} else {
		/*
		 * Set up Mode Register 1 - set receive and transmit FIFOs to
		 * maximum size, allow transmit of multiple packets (only
		 * channel 0 is used).
		 *
		 * XXX: Allow pause packets??
		 */
		sc->sc_mr1 =
		    MR1_RFS(MR1__FS_4KB) |
		    MR1_TFS(MR1__FS_2KB) |
		    MR1_TR0_MULTIPLE;

		sc->sc_stacr_read = STACR_READ;
		sc->sc_stacr_write = STACR_WRITE;
		sc->sc_stacr_bits = STACR_OPBC(opbc);
		sc->sc_stacr_completed = true;
	}

	intr_establish_xname(oaa->opb_irq, IST_LEVEL, IPL_NET, emac_intr, sc,
	    device_xname(self));
	mal_intr_establish(sc->sc_instance, sc);

	if (oaa->opb_flags & OPB_FLAGS_EMAC_HT256)
		sc->sc_htsize = 256;
	else
		sc->sc_htsize = 64;

	/* Clear all interrupts */
	EMAC_WRITE(sc, EMAC_ISR, ISR_ALL);

	/*
	 * Initialise the media structures.
	 */
	mii->mii_ifp = ifp;
	mii->mii_readreg = emac_mii_readreg;
	mii->mii_writereg = emac_mii_writereg;
	mii->mii_statchg = emac_mii_statchg;

	sc->sc_ethercom.ec_mii = mii;
	ifmedia_init(&mii->mii_media, 0, ether_mediachange, ether_mediastatus);
	mii_attach(self, mii, 0xffffffff, mii_phy, MII_OFFSET_ANY,
	    MIIF_DOPAUSE);
	if (LIST_FIRST(&mii->mii_phys) == NULL) {
		ifmedia_add(&mii->mii_media, IFM_ETHER | IFM_NONE, 0, NULL);
		ifmedia_set(&mii->mii_media, IFM_ETHER | IFM_NONE);
	} else
		ifmedia_set(&mii->mii_media, IFM_ETHER | IFM_AUTO);

	ifp = &sc->sc_ethercom.ec_if;
	strcpy(ifp->if_xname, xname);
	ifp->if_softc = sc;
	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
	ifp->if_start = emac_start;
	ifp->if_ioctl = emac_ioctl;
	ifp->if_init = emac_init;
	ifp->if_stop = emac_stop;
	ifp->if_watchdog = emac_watchdog;
	IFQ_SET_READY(&ifp->if_snd);

	/*
	 * We can support 802.1Q VLAN-sized frames.
	 */
	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;

#ifdef EMAC_TAH
	/*
	 * The TAH generates IPv4/TCP/UDP checksums on tx and rx
	 */
	if (sc->sc_tah) {
		ifp->if_capabilities |=
		    IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx |
		    IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
		    IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
		    IFCAP_TSOv4;
#ifdef INET6
	/* no IPv6 header checksum, hence no IFCAP for it.*/
		ifp->if_capabilities |=
		    IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_TCPv6_Rx |
		    IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_UDPv6_Rx |
		    IFCAP_TSOv6;
#endif
	}
#endif

	/*
	 * Attach the interface.
	 */
	if_attach(ifp);
	if_deferred_start_init(ifp, NULL);
	ether_ifattach(ifp, enaddr);

	rnd_attach_source(&sc->rnd_source, xname, RND_TYPE_NET,
	    RND_FLAG_DEFAULT);

#ifdef EMAC_EVENT_COUNTERS
	/*
	 * Attach the event counters.
	 */
	evcnt_attach_dynamic(&sc->sc_ev_txintr, EVCNT_TYPE_INTR,
	    NULL, xname, "txintr");
	evcnt_attach_dynamic(&sc->sc_ev_rxintr, EVCNT_TYPE_INTR,
	    NULL, xname, "rxintr");
	evcnt_attach_dynamic(&sc->sc_ev_txde, EVCNT_TYPE_INTR,
	    NULL, xname, "txde");
	evcnt_attach_dynamic(&sc->sc_ev_rxde, EVCNT_TYPE_INTR,
	    NULL, xname, "rxde");
	evcnt_attach_dynamic(&sc->sc_ev_intr, EVCNT_TYPE_INTR,
	    NULL, xname, "intr");

	evcnt_attach_dynamic(&sc->sc_ev_txreap, EVCNT_TYPE_MISC,
	    NULL, xname, "txreap");
	evcnt_attach_dynamic(&sc->sc_ev_txsstall, EVCNT_TYPE_MISC,
	    NULL, xname, "txsstall");
	evcnt_attach_dynamic(&sc->sc_ev_txdstall, EVCNT_TYPE_MISC,
	    NULL, xname, "txdstall");
	evcnt_attach_dynamic(&sc->sc_ev_txdrop, EVCNT_TYPE_MISC,
	    NULL, xname, "txdrop");
	evcnt_attach_dynamic(&sc->sc_ev_tu, EVCNT_TYPE_MISC,
	    NULL, xname, "tu");
#ifdef EMAC_TAH
	evcnt_attach_dynamic(&sc->sc_ev_txcsum, EVCNT_TYPE_MISC,
	    NULL, xname, "txcsum");
	evcnt_attach_dynamic(&sc->sc_ev_tahted, EVCNT_TYPE_MISC,
	    NULL, xname, "tahted");
	evcnt_attach_dynamic(&sc->sc_ev_rxcsum, EVCNT_TYPE_MISC,
	    NULL, xname, "rxcsum");
	evcnt_attach_dynamic(&sc->sc_ev_rxcsumbad, EVCNT_TYPE_MISC,
	    NULL, xname, "rxcsumbad");
	evcnt_attach_dynamic(&sc->sc_ev_txtso, EVCNT_TYPE_MISC,
	    NULL, xname, "txtso");
	evcnt_attach_dynamic(&sc->sc_ev_txtsofb, EVCNT_TYPE_MISC,
	    NULL, xname, "txtsofb");
#endif
#endif /* EMAC_EVENT_COUNTERS */

	/*
	 * Make sure the interface is shutdown during reboot.
	 */
	sc->sc_sdhook = shutdownhook_establish(emac_shutdown, sc);
	if (sc->sc_sdhook == NULL)
		aprint_error_dev(self,
		    "WARNING: unable to establish shutdown hook\n");

	return;

	/*
	 * Free any resources we've allocated during the failed attach
	 * attempt.  Do this in reverse order and fall through.
	 */
fail_5:
	for (i = 0; i < EMAC_NRXDESC; i++) {
		if (sc->sc_rxsoft[i].rxs_dmamap != NULL)
			bus_dmamap_destroy(sc->sc_dmat,
			    sc->sc_rxsoft[i].rxs_dmamap);
	}
fail_4:
	for (i = 0; i < EMAC_TXQUEUELEN; i++) {
		if (sc->sc_txsoft[i].txs_dmamap != NULL)
			bus_dmamap_destroy(sc->sc_dmat,
			    sc->sc_txsoft[i].txs_dmamap);
	}
	bus_dmamap_unload(sc->sc_dmat, sc->sc_cddmamap);
fail_3:
	bus_dmamap_destroy(sc->sc_dmat, sc->sc_cddmamap);
fail_2:
	bus_dmamem_unmap(sc->sc_dmat, (void *)sc->sc_control_data,
	    sizeof(struct emac_control_data));
fail_1:
	bus_dmamem_free(sc->sc_dmat, &seg, nseg);
fail_0:
	return;
}

/*
 * EMAC General interrupt handler
 */
static int
emac_intr(void *arg)
{
	struct emac_softc *sc = arg;
	uint32_t status;

	EMAC_EVCNT_INCR(&sc->sc_ev_intr);
	status = EMAC_READ(sc, EMAC_ISR);

	/* Clear the interrupt status bits. */
	EMAC_WRITE(sc, EMAC_ISR, status);

#ifdef EMAC_TAH
	if (__predict_false(
	    sc->sc_ethercom.ec_if.if_flags & IFF_DEBUG)) {
		static const struct timeval rate = { 1, 0 };

		sc->sc_isr_seen |= status;
		if (status == 0)
			sc->sc_isr_zero++;
		if (ratecheck(&sc->sc_isr_last, &rate))
			aprint_normal_ifnet(&sc->sc_ethercom.ec_if,
			    "ISR 0x%08x (seen 0x%08x, %u empty)\n",
			    status, sc->sc_isr_seen, sc->sc_isr_zero);
	}
#endif

	return 1;
}

static void
emac_shutdown(void *arg)
{
	struct emac_softc *sc = arg;

	emac_stop(&sc->sc_ethercom.ec_if, 0);

#ifdef EMAC_TAH
	/*
	 * Put the TAH back into bypass
	 */
	if (sc->sc_tah) {
		uint32_t sdr;

		sdr = mfsdr(DCR_SDR0_ETH_CFG);
		sdr |= SDR0_ETH_CFG_TAH_BYPASS(sc->sc_instance);
		mtsdr(DCR_SDR0_ETH_CFG, sdr);
	}
#endif
}


/*
 * ifnet interface functions
 */

#ifdef EMAC_TAH
/*
 * Decide what the TAH can do for this packet.
 */
#define	EMAC_TAHTX_SW	0	/* software handling required */
#define	EMAC_TAHTX_CSUM	1	/* HAC = 111: checksum insertion */
#define	EMAC_TAHTX_TSO	2	/* HAC = SSRn: checksum + segmentation */

static int
emac_tah_tx_classify(struct mbuf *m0, u_int *ssr_bytesp)
{
	const struct ether_header *eh;
	const struct ip *ip;
#ifdef INET6
	const struct ip6_hdr *ip6;
#endif
	const struct tcphdr *th;
	const int tso = m0->m_pkthdr.csum_flags & (M_CSUM_TSOv4
#ifdef INET6
	    | M_CSUM_TSOv6
#endif
	    );
	u_int bytes;

	/*
	 * The headers of locally generated packets are contiguous in
	 * the first mbuf; anything else falls back to software.
	 */
	if (m0->m_len < sizeof(*eh))
		return EMAC_TAHTX_SW;
	eh = mtod(m0, const struct ether_header *);

	switch (ntohs(eh->ether_type)) {
	case ETHERTYPE_IP:
		if (m0->m_len < sizeof(*eh) + sizeof(*ip))
			return EMAC_TAHTX_SW;
		ip = (const struct ip *)(mtod(m0, const char *) +
		    sizeof(*eh));
		if (ip->ip_hl != 5)
			return EMAC_TAHTX_SW;	/* options: TAH_TSR[IPOP] */
		if (ip->ip_off & htons(IP_OFFMASK | IP_MF))
			return EMAC_TAHTX_SW;	/* fragment: TAH_TSR[IPFP] */
		if (tso == 0) {
			if (ip->ip_p != IPPROTO_TCP &&
			    ip->ip_p != IPPROTO_UDP)
				return EMAC_TAHTX_SW;	/* TAH_TSR[UP] */
			if (ntohs(ip->ip_len) < 40)
				return EMAC_TAHTX_SW;	/* TAH_TSR[ILTS] */
			return EMAC_TAHTX_CSUM;
		}
		/* M_CSUM_TSOv4; M_CSUM_IPv4 may ride along (ip_output) */
		if (ip->ip_p != IPPROTO_TCP)
			return EMAC_TAHTX_SW;	/* TAH_TSR[SUDP] */
		if (m0->m_len < sizeof(*eh) + sizeof(*ip) + sizeof(*th))
			return EMAC_TAHTX_SW;
		if (ntohs(ip->ip_len) != m0->m_pkthdr.len - sizeof(*eh))
			return EMAC_TAHTX_SW;	/* TAH_TSR[DLM] */
		th = (const struct tcphdr *)((const char *)ip + sizeof(*ip));
		bytes = m0->m_pkthdr.segsz + sizeof(*ip) + sizeof(*th);
		goto tso_common;

#ifdef INET6
	case ETHERTYPE_IPV6:
		if (m0->m_len < sizeof(*eh) + sizeof(*ip6))
			return EMAC_TAHTX_SW;
		ip6 = (const struct ip6_hdr *)(mtod(m0, const char *) +
		    sizeof(*eh));
		/* extension headers: TAH_TSR[IP6EHP]/[IP6UNH] */
		if (tso == 0) {
			if (ip6->ip6_nxt != IPPROTO_TCP &&
			    ip6->ip6_nxt != IPPROTO_UDP)
				return EMAC_TAHTX_SW;
			return EMAC_TAHTX_CSUM;
		}
		/* M_CSUM_TSOv6 */
		if (ip6->ip6_nxt != IPPROTO_TCP)
			return EMAC_TAHTX_SW;
		if (m0->m_len < sizeof(*eh) + sizeof(*ip6) + sizeof(*th))
			return EMAC_TAHTX_SW;
		if (ntohs(ip6->ip6_plen) !=
		    m0->m_pkthdr.len - sizeof(*eh) - sizeof(*ip6))
			return EMAC_TAHTX_SW;	/* TAH_TSR[IP6HPLM] */
		th = (const struct tcphdr *)((const char *)ip6 +
		    sizeof(*ip6));
		bytes = m0->m_pkthdr.segsz + sizeof(*ip6) + sizeof(*th);
		goto tso_common;
#endif

	default:
		/* VLAN-tagged or LLC/SNAP frames? */
		return EMAC_TAHTX_SW;
	}

 tso_common:
	/*
	 * TAH does not replicate TCP options!
	 */
	if (th->th_off != sizeof(*th) >> 2)
		return EMAC_TAHTX_SW;
	if (th->th_flags & (TH_SYN | TH_RST))
		return EMAC_TAHTX_SW;	/* TAH_TSR[TFP] */
	/* TAH_SSR holds halfwords; bounds per TAH_TSR[SSTS]/FIFO size */
	if ((bytes & 1) != 0 ||
	    bytes < EMAC_TSO_SEG_MIN || bytes > EMAC_TSO_SEG_MAX)
		return EMAC_TAHTX_SW;
	*ssr_bytesp = bytes;
	return EMAC_TAHTX_TSO;
}

/*
 * Claim a TAH segment-size register slot for byte-sized segments
 */
static int
emac_tso_ssr_claim(struct emac_softc *sc, u_int bytes)
{
	int i, free = -1;

	for (i = 0; i < EMAC_TSO_NSSR; i++) {
		if (sc->sc_ssr_bytes[i] == bytes) {
			sc->sc_ssr_refs[i]++;
			return i;
		}
		if (free < 0 && sc->sc_ssr_refs[i] == 0)
			free = i;
	}
	if (free >= 0) {
		TAH_WRITE(sc, TAH_SSR(free), TAH_SSR_SS(bytes / 2));
		sc->sc_ssr_bytes[free] = bytes;
		sc->sc_ssr_refs[free] = 1;
	}
	return free;
}

static void
emac_tso_ssr_release(struct emac_softc *sc, struct emac_txsoft *txs)
{

	if (txs->txs_ssr >= 0) {
		KASSERT(sc->sc_ssr_refs[txs->txs_ssr] > 0);
		sc->sc_ssr_refs[txs->txs_ssr]--;
		txs->txs_ssr = -1;
	}
}

/*
 * Segment a TSO combined packet in software
 */
static int
emac_tso_sw_segment(struct emac_softc *sc, struct ifnet *ifp,
    struct mbuf *m0)
{
	const struct ether_header *eh;
	const struct tcphdr *th;
	struct mbuf *m;
	u_int hdrlen = 0;
#ifdef INET6
	bool v6 = false;
#endif

	/*
	 * tcp[46]_segment() silently truncates a payload that is not a
	 * multiple of segsz (see DIAGNOSTIC).
	 */
	if (m0->m_len >= sizeof(*eh)) {
		eh = mtod(m0, const struct ether_header *);
		switch (ntohs(eh->ether_type)) {
		case ETHERTYPE_IP: {
			const struct ip *ip;

			if (m0->m_len < sizeof(*eh) + sizeof(*ip))
				break;
			ip = (const struct ip *)(mtod(m0, const char *) +
			    sizeof(*eh));
			if (ip->ip_p != IPPROTO_TCP ||
			    m0->m_len < sizeof(*eh) + (ip->ip_hl << 2) +
			    sizeof(*th))
				break;
			th = (const struct tcphdr *)((const char *)ip +
			    (ip->ip_hl << 2));
			hdrlen = sizeof(*eh) + (ip->ip_hl << 2) +
			    (th->th_off << 2);
			break;
		}
#ifdef INET6
		case ETHERTYPE_IPV6: {
			const struct ip6_hdr *ip6;

			if (m0->m_len < sizeof(*eh) + sizeof(*ip6) +
			    sizeof(*th))
				break;
			ip6 = (const struct ip6_hdr *)
			    (mtod(m0, const char *) + sizeof(*eh));
			if (ip6->ip6_nxt != IPPROTO_TCP)
				break;
			th = (const struct tcphdr *)((const char *)ip6 +
			    sizeof(*ip6));
			hdrlen = sizeof(*eh) + sizeof(*ip6) +
			    (th->th_off << 2);
			v6 = true;
			break;
		}
#endif
		}
	}
	if (m0->m_pkthdr.segsz == 0 || hdrlen == 0 ||
	    hdrlen >= m0->m_pkthdr.len ||
	    (m0->m_pkthdr.len - hdrlen) % m0->m_pkthdr.segsz != 0) {
		m_freem(m0);
		return -1;
	}

	EMAC_EVCNT_INCR(&sc->sc_ev_txtsofb);

	/*
	 * tcp[46]_segment() consume m0, recompute all checksums in
	 * software 
	 */
#ifdef INET6
	if (v6)
		m = tcp6_segment(m0, ETHER_HDR_LEN);
	else
#endif
		m = tcp4_segment(m0, ETHER_HDR_LEN);
	if (m == NULL)
		return -1;
	KASSERT(sc->sc_txpending == NULL);
	sc->sc_txpending = m;
	return 0;
}

/*
 * Translate the TAH RX verification result into mbuf checksum flags.
 */
static void
emac_rx_csum(struct emac_softc *sc, struct mbuf *m, bool bad)
{
	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
	const struct ether_header *eh;
	const struct ip *ip;
#ifdef INET6
	const struct ip6_hdr *ip6;
#endif
	const struct udphdr *uh;
	int flags = 0;

	if (m->m_len < sizeof(*eh))
		return;
	eh = mtod(m, const struct ether_header *);

	switch (ntohs(eh->ether_type)) {
	case ETHERTYPE_IP:
		if (m->m_len < sizeof(*eh) + sizeof(*ip))
			return;
		ip = (const struct ip *)(mtod(m, const char *) +
		    sizeof(*eh));
		if (ip->ip_hl != 5 ||
		    (ip->ip_off & htons(IP_OFFMASK | IP_MF)) != 0)
			return;		/* not verified by the TAH */

		/*
		 * The TAH verifies TCP and UDP packets ONLY
		 */
		switch (ip->ip_p) {
		case IPPROTO_TCP:
			flags = M_CSUM_IPv4 | M_CSUM_TCPv4 |
			    (bad ? M_CSUM_IPv4_BAD | M_CSUM_TCP_UDP_BAD : 0);
			break;
		case IPPROTO_UDP:
			/*
			 * A zero UDP checksum means "no checksum"
			 */
			if (m->m_len < sizeof(*eh) + sizeof(*ip) +
			    sizeof(*uh))
				return;
			uh = (const struct udphdr *)((const char *)ip +
			    sizeof(*ip));
			if (uh->uh_sum == 0)
				return;
			flags = M_CSUM_IPv4 | M_CSUM_UDPv4 |
			    (bad ? M_CSUM_IPv4_BAD | M_CSUM_TCP_UDP_BAD : 0);
			break;
		default:
			return;
		}
		break;

#ifdef INET6
	case ETHERTYPE_IPV6:
		if (m->m_len < sizeof(*eh) + sizeof(*ip6))
			return;
		ip6 = (const struct ip6_hdr *)(mtod(m, const char *) +
		    sizeof(*eh));
		/* No header checksum in IPv6, no flags for ext headers. */
		switch (ip6->ip6_nxt) {
		case IPPROTO_TCP:
			flags = M_CSUM_TCPv6 |
			    (bad ? M_CSUM_TCP_UDP_BAD : 0);
			break;
		case IPPROTO_UDP:
			flags = M_CSUM_UDPv6 |
			    (bad ? M_CSUM_TCP_UDP_BAD : 0);
			break;
		}
		break;
#endif

	default:
		return;
	}

	if (flags == 0)
		return;

#ifdef EMAC_EVENT_COUNTERS
	if (bad)
		EMAC_EVCNT_INCR(&sc->sc_ev_rxcsumbad);
	else
		EMAC_EVCNT_INCR(&sc->sc_ev_rxcsum);
#endif

	/* Claim only the enabled "good" flags; propagate BAD always. */
	m->m_pkthdr.csum_flags |= flags & (ifp->if_csum_flags_rx |
	    M_CSUM_IPv4_BAD | M_CSUM_TCP_UDP_BAD);
}
#endif /* EMAC_TAH */

static void
emac_start(struct ifnet *ifp)
{
	struct emac_softc *sc = ifp->if_softc;
	struct mbuf *m0;
	struct emac_txsoft *txs;
	bus_dmamap_t dmamap;
	int error, firsttx, nexttx, lasttx, ofree, seg;
#ifdef EMAC_TAH
	uint16_t txc;
	u_int tso_bytes;
	int tah_ssr, tah_cls;
	bool tah_pending;
#endif

	lasttx = 0;	/* XXX gcc */

	if ((ifp->if_flags & IFF_RUNNING) == 0)
		return;

	/*
	 * Remember the previous number of free descriptors.
	 */
	ofree = sc->sc_txfree;

	/*
	 * Loop through the send queue, setting up transmit descriptors
	 * until we drain the queue, or use up all available transmit
	 * descriptors.
	 */
	for (;;) {
#ifdef EMAC_TAH
		txc = 0;
		tso_bytes = 0;
		tah_ssr = -1;
		tah_pending = false;

		if ((m0 = sc->sc_txpending) != NULL)
			tah_pending = true;
		else
#endif
		{
			/* Grab a packet off the queue. */
			IFQ_POLL(&ifp->if_snd, m0);
			if (m0 == NULL)
				break;
		}

		/*
		 * Get a work queue entry.  Reclaim used Tx descriptors if
		 * we are running low.
		 */
		if (sc->sc_txsfree < EMAC_TXQUEUE_GC) {
			emac_txreap(sc);
			if (sc->sc_txsfree == 0) {
				EMAC_EVCNT_INCR(&sc->sc_ev_txsstall);
				break;
			}
		}

#ifdef EMAC_TAH
		/*
		 * Decide what the TAH should do for this packet
		 */
		if (sc->sc_tah && m0->m_pkthdr.csum_flags != 0) {
			tah_cls = emac_tah_tx_classify(m0, &tso_bytes);

			if (tah_cls == EMAC_TAHTX_TSO) {
				tah_ssr = emac_tso_ssr_claim(sc, tso_bytes);
				if (tah_ssr < 0)	/* all slots busy */
					tah_cls = EMAC_TAHTX_SW;
			}

			switch (tah_cls) {
			case EMAC_TAHTX_TSO:
				txc = EMAC_TXC_HAC_SSR(tah_ssr);
				EMAC_EVCNT_INCR(&sc->sc_ev_txtso);
				break;

			case EMAC_TAHTX_CSUM:
				txc = EMAC_TXC_HAC_CSUM;
				EMAC_EVCNT_INCR(&sc->sc_ev_txcsum);
				break;

			case EMAC_TAHTX_SW:
				if (m0->m_pkthdr.csum_flags &
				    (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
					/* Segment in software. */
					IFQ_DEQUEUE(&ifp->if_snd, m0);
					if (emac_tso_sw_segment(sc, ifp,
					    m0) != 0)
						if_statinc(ifp, if_oerrors);
					continue;
				}

				/* Compute the checksums in software. */
				{
					const int cf4 =
					    m0->m_pkthdr.csum_flags &
					    (M_CSUM_IPv4 | M_CSUM_TCPv4 |
					    M_CSUM_UDPv4);
#ifdef INET6
					const int cf6 =
					    m0->m_pkthdr.csum_flags &
					    (M_CSUM_TCPv6 | M_CSUM_UDPv6);
#else
					const int cf6 = 0;
#endif
					if (cf4 != 0)
						in_undefer_cksum(m0,
						    ETHER_HDR_LEN, cf4);
#ifdef INET6
					if (cf6 != 0)
						in6_undefer_cksum(m0,
						    ETHER_HDR_LEN, cf6);
#endif
					m0->m_pkthdr.csum_flags &=
					    ~(cf4 | cf6);
				}
				break;
			}
		}
#endif

		txs = &sc->sc_txsoft[sc->sc_txsnext];
		dmamap = txs->txs_dmamap;

		/*
		 * Load the DMA map.  If this fails, the packet either
		 * didn't fit in the allotted number of segments, or we
		 * were short on resources.  In this case, we'll copy
		 * and try again.
		 */
		error = bus_dmamap_load_mbuf(sc->sc_dmat, dmamap, m0,
		    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
		if (error) {
			if (error == EFBIG) {
#ifdef EMAC_TAH
				if (tah_ssr >= 0) {
					/*
					 * A TSO burst with too many
					 * segments: segment it in
					 * software instead of dropping.
					 */
					sc->sc_ssr_refs[tah_ssr]--;
					IFQ_DEQUEUE(&ifp->if_snd, m0);
					if (emac_tso_sw_segment(sc, ifp,
					    m0) != 0)
						if_statinc(ifp, if_oerrors);
					continue;
				}
#endif
				EMAC_EVCNT_INCR(&sc->sc_ev_txdrop);
				aprint_error_ifnet(ifp,
				    "Tx packet consumes too many "
				    "DMA segments, dropping...\n");
#ifdef EMAC_TAH
				if (tah_pending) {
					sc->sc_txpending = m0->m_nextpkt;
					m0->m_nextpkt = NULL;
				} else
#endif
					IFQ_DEQUEUE(&ifp->if_snd, m0);
				m_freem(m0);
				continue;
			}
			/* Short on resources, just stop for now. */
#ifdef EMAC_TAH
			if (tah_ssr >= 0)
				sc->sc_ssr_refs[tah_ssr]--;
#endif
			break;
		}

		/*
		 * Ensure we have enough descriptors free to describe
		 * the packet.
		 */
		if (dmamap->dm_nsegs > sc->sc_txfree) {
			/*
			 * Not enough free descriptors to transmit this
			 * packet.  We haven't committed anything yet,
			 * so just unload the DMA map, put the packet
			 * back on the queue, and punt.  Notify the upper
			 * layer that there are not more slots left.
			 *
			 */
			bus_dmamap_unload(sc->sc_dmat, dmamap);
			EMAC_EVCNT_INCR(&sc->sc_ev_txdstall);
#ifdef EMAC_TAH
			if (tah_ssr >= 0)
				sc->sc_ssr_refs[tah_ssr]--;
#endif
			break;
		}

#ifdef EMAC_TAH
		if (tah_pending) {
			sc->sc_txpending = m0->m_nextpkt;
			m0->m_nextpkt = NULL;
		} else
#endif
			IFQ_DEQUEUE(&ifp->if_snd, m0);

		/*
		 * WE ARE NOW COMMITTED TO TRANSMITTING THE PACKET.
		 */

		/* Sync the DMA map. */
		bus_dmamap_sync(sc->sc_dmat, dmamap, 0, dmamap->dm_mapsize,
		    BUS_DMASYNC_PREWRITE);

		/*
		 * Store a pointer to the packet so that we can free it
		 * later.
		 */
		txs->txs_mbuf = m0;
		txs->txs_firstdesc = sc->sc_txnext;
		txs->txs_ndesc = dmamap->dm_nsegs;
#ifdef EMAC_TAH
		txs->txs_ssr = tah_ssr;
		if (tah_ssr >= 0)
			/*
			 * Wire packets this burst produces; tso_bytes -
			 * segsz is the IP + TCP header length.
			 */
			txs->txs_opackets = howmany(m0->m_pkthdr.len -
			    ETHER_HDR_LEN - (tso_bytes - m0->m_pkthdr.segsz),
			    m0->m_pkthdr.segsz);
		else
			txs->txs_opackets = 1;
#endif

		/*
		 * Initialize the transmit descriptor.
		 */
		firsttx = sc->sc_txnext;
		for (nexttx = sc->sc_txnext, seg = 0;
		     seg < dmamap->dm_nsegs;
		     seg++, nexttx = EMAC_NEXTTX(nexttx)) {
			struct mal_descriptor *txdesc =
			    &sc->sc_txdescs[nexttx];

			/*
			 * If this is the first descriptor we're
			 * enqueueing, don't set the TX_READY bit just
			 * yet.  That could cause a race condition.
			 * We'll do it below.
			 */
			txdesc->md_data = dmamap->dm_segs[seg].ds_addr;
			txdesc->md_data_len = dmamap->dm_segs[seg].ds_len;
			txdesc->md_stat_ctrl =
			    (txdesc->md_stat_ctrl & MAL_TX_WRAP) |
			    (nexttx == firsttx ? 0 : MAL_TX_READY) |
#ifdef EMAC_TAH
			    txc |
#endif
			    EMAC_TXC_GFCS | EMAC_TXC_GPAD;
			lasttx = nexttx;
		}

		/* Set the LAST bit on the last segment. */
		sc->sc_txdescs[lasttx].md_stat_ctrl |= MAL_TX_LAST;

		/*
		 * Request a Tx-complete interrupt for this packet.
		 */
		sc->sc_txdescs[lasttx].md_stat_ctrl |= MAL_TX_INTERRUPT;

		txs->txs_lastdesc = lasttx;

		/* Sync the descriptors we're using. */
		EMAC_CDTXSYNC(sc, sc->sc_txnext, dmamap->dm_nsegs,
		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);

		/*
		 * The entire packet chain is set up.  Give the
		 * first descriptor to the chip now.
		 */
		sc->sc_txdescs[firsttx].md_stat_ctrl |= MAL_TX_READY;
		EMAC_CDTXSYNC(sc, firsttx, 1,
		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
		/*
		 * Tell the EMAC that a new packet is available.
		 */
		EMAC_WRITE(sc, EMAC_TMR0, TMR0_GNP0 | TMR0_TFAE_2);

		/* Advance the tx pointer. */
		sc->sc_txfree -= txs->txs_ndesc;
		sc->sc_txnext = nexttx;

		sc->sc_txsfree--;
		sc->sc_txsnext = EMAC_NEXTTXS(sc->sc_txsnext);

		/*
		 * Pass the packet to any BPF listeners.
		 */
		bpf_mtap(ifp, m0, BPF_D_OUT);
	}

	if (sc->sc_txfree != ofree)
		/* Set a watchdog timer in case the chip flakes out. */
		ifp->if_timer = 5;
}

static int
emac_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
	struct emac_softc *sc = ifp->if_softc;
	int s, error;

	s = splnet();

	switch (cmd) {
	case SIOCSIFMTU:
	{
		struct ifreq *ifr = (struct ifreq *)data;
		int maxmtu;

		if (sc->sc_ethercom.ec_capabilities & ETHERCAP_JUMBO_MTU)
			maxmtu = EMAC_MAX_MTU;
		else
			maxmtu = ETHERMTU;

		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > maxmtu)
			error = EINVAL;
		else if ((error = ifioctl_common(ifp, cmd, data)) != ENETRESET)
			break;
		else if (ifp->if_flags & IFF_UP)
			error = emac_init(ifp);
		else
			error = 0;
		break;
	}

	default:
		error = ether_ioctl(ifp, cmd, data);
		if (error == ENETRESET) {
#ifdef EMAC_TAH
			if (cmd == SIOCSIFCAP) {
				/*
				 * Checksum offload changed, reinitialize
				 */
				if (ifp->if_flags & IFF_RUNNING)
					error = emac_init(ifp);
				else
					error = 0;
				break;
			}
#endif
			/*
			 * Multicast list has changed; set the hardware filter
			 * accordingly.
			 */
			if (ifp->if_flags & IFF_RUNNING)
				error = emac_set_filter(sc);
			else
				error = 0;
		}
	}

	/* try to get more packets going */
	emac_start(ifp);

	splx(s);
	return error;
}

static int
emac_init(struct ifnet *ifp)
{
	struct emac_softc *sc = ifp->if_softc;
	struct emac_rxsoft *rxs;
	const uint8_t *enaddr = CLLADDR(ifp->if_sadl);
	int error, i;

	error = 0;

	/* Cancel any pending I/O. */
	emac_stop(ifp, 0);

	/* Reset the chip to a known state. */
	emac_soft_reset(sc);

#ifdef EMAC_TAH
	/*
	 * Re-establish a sane TAH config
	 */
	if (sc->sc_tah) {
		sc->sc_tah_cvr = (ifp->if_csum_flags_rx &
		    (M_CSUM_IPv4 | M_CSUM_TCPv4 | M_CSUM_UDPv4 |
		    M_CSUM_TCPv6 | M_CSUM_UDPv6)) != 0;
		emac_tah_reset(sc);
	}
#endif

	/*
	 * Initialise the transmit descriptor ring.
	 */
	memset(sc->sc_txdescs, 0, sizeof(sc->sc_txdescs));
	/* set wrap on last descriptor */
	sc->sc_txdescs[EMAC_NTXDESC - 1].md_stat_ctrl |= MAL_TX_WRAP;
	EMAC_CDTXSYNC(sc, 0, EMAC_NTXDESC,
	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
	sc->sc_txfree = EMAC_NTXDESC;
	sc->sc_txnext = 0;

	/*
	 * Initialise the transmit job descriptors.
	 */
	for (i = 0; i < EMAC_TXQUEUELEN; i++)
		sc->sc_txsoft[i].txs_mbuf = NULL;
	sc->sc_txsfree = EMAC_TXQUEUELEN;
	sc->sc_txsnext = 0;
	sc->sc_txsdirty = 0;

	/*
	 * Initialise the receiver descriptor and receive job
	 * descriptor rings.
	 */
	for (i = 0; i < EMAC_NRXDESC; i++) {
		rxs = &sc->sc_rxsoft[i];
		if (rxs->rxs_mbuf == NULL) {
			if ((error = emac_add_rxbuf(sc, i)) != 0) {
				aprint_error_ifnet(ifp,
				    "unable to allocate or map rx buffer %d,"
				    " error = %d\n",
				    i, error);
				/*
				 * XXX Should attempt to run with fewer receive
				 * XXX buffers instead of just failing.
				 */
				emac_rxdrain(sc);
				goto out;
			}
		} else
			EMAC_INIT_RXDESC(sc, i);
	}
	sc->sc_rxptr = 0;

	/*
	 * Set the current media.
	 */
	if ((error = ether_mediachange(ifp)) != 0)
		goto out;

	/*
	 * Load the MAC address.
	 */
	EMAC_WRITE(sc, EMAC_IAHR, enaddr[0] << 8 | enaddr[1]);
	EMAC_WRITE(sc, EMAC_IALR,
	    enaddr[2] << 24 | enaddr[3] << 16 | enaddr[4] << 8 | enaddr[5]);

	/* Enable the transmit and receive channel on the MAL. */
	error = mal_start(sc->sc_instance,
	    EMAC_CDTXADDR(sc, 0), EMAC_CDRXADDR(sc, 0));
	if (error)
		goto out;

	sc->sc_mr1 &= ~MR1_JPSM;
	if (ifp->if_mtu > ETHERMTU)
		/* Enable Jumbo Packet Support Mode */
		sc->sc_mr1 |= MR1_JPSM;

	/* Set fifos, media modes. */
	EMAC_WRITE(sc, EMAC_MR1, sc->sc_mr1);

	/*
	 * Enable Individual and (possibly) Broadcast Address modes,
	 * runt packets, and strip padding.
	 */
	EMAC_WRITE(sc, EMAC_RMR, RMR_IAE | RMR_RRP | RMR_SP | RMR_TFAE_2 |
	    (ifp->if_flags & IFF_PROMISC ? RMR_PME : 0) |
	    (ifp->if_flags & IFF_BROADCAST ? RMR_BAE : 0));

	/*
	 * Set multicast filter.
	 */
	emac_set_filter(sc);

	/*
	 * Set low- and urgent-priority request thresholds.
	 */
	EMAC_WRITE(sc, EMAC_TMR1,
	    ((7 << TMR1_TLR_SHIFT) & TMR1_TLR_MASK) | /* 16 word burst */
	    ((15 << TMR1_TUR_SHIFT) & TMR1_TUR_MASK));
	/*
	 * Set Transmit Request Threshold Register.
	 */
	EMAC_WRITE(sc, EMAC_TRTR, TRTR_256);

	/*
	 * Set high and low receive watermarks.
	 */
#ifdef EMAC_TAH
	/*
	 * the fix probably is OK for all EMACs
	 */
	EMAC_WRITE(sc, EMAC_RWMR,
	    30 << RWMR_RLWM_SHIFT | 64 << RWMR_RHWM_SHIFT);
#else
	EMAC_WRITE(sc, EMAC_RWMR,
	    30 << RWMR_RLWM_SHIFT | 64 << RWMR_RLWM_SHIFT);
#endif

	/*
	 * Set frame gap.
	 */
	EMAC_WRITE(sc, EMAC_IPGVR, 8);

	/*
	 * Set interrupt status enable bits for EMAC.
	 */
	EMAC_WRITE(sc, EMAC_ISER,
	    ISR_TXPE |		/* TX Parity Error */
	    ISR_RXPE |		/* RX Parity Error */
	    ISR_TXUE |		/* TX Underrun Event */
#ifndef EMAC_TAH
	    /*
	     * With the TAH in the RX path, RXOE latches on most
	     * received packets without any actual packet loss
	     */
	    ISR_RXOE |		/* RX Overrun Event */
#endif
	    ISR_OVR  |		/* Overrun Error */
	    ISR_PP   |		/* Pause Packet */
	    ISR_BP   |		/* Bad Packet */
	    ISR_RP   |		/* Runt Packet */
	    ISR_SE   |		/* Short Event */
	    ISR_ALE  |		/* Alignment Error */
	    ISR_BFCS |		/* Bad FCS */
	    ISR_PTLE |		/* Packet Too Long Error */
	    ISR_ORE  |		/* Out of Range Error */
	    ISR_IRE  |		/* In Range Error */
	    ISR_SE0  |		/* Signal Quality Error 0 (SQE) */
	    ISR_TE0  |		/* Transmit Error 0 */
	    ISR_MOS  |		/* MMA Operation Succeeded */
	    ISR_MOF);		/* MMA Operation Failed */

	/*
	 * Enable the transmit and receive channel on the EMAC.
	 */
	EMAC_WRITE(sc, EMAC_MR0, MR0_TXE | MR0_RXE);

	/*
	 * Start the one second MII clock.
	 */
	callout_reset(&sc->sc_callout, hz, emac_mii_tick, sc);

	/*
	 * ... all done!
	 */
	ifp->if_flags |= IFF_RUNNING;

 out:
	if (error) {
		ifp->if_flags &= ~IFF_RUNNING;
		ifp->if_timer = 0;
		aprint_error_ifnet(ifp, "interface not running\n");
	}
	return error;
}

static void
emac_stop(struct ifnet *ifp, int disable)
{
	struct emac_softc *sc = ifp->if_softc;
	struct emac_txsoft *txs;
	int i;

	/* Stop the one second clock. */
	callout_stop(&sc->sc_callout);

	/* Down the MII */
	mii_down(&sc->sc_mii);

	/* Disable interrupts. */
	EMAC_WRITE(sc, EMAC_ISER, 0);

	/* Disable the receive and transmit channels. */
	mal_stop(sc->sc_instance);

	/* Disable the transmit enable and receive MACs. */
	EMAC_WRITE(sc, EMAC_MR0,
	    EMAC_READ(sc, EMAC_MR0) & ~(MR0_TXE | MR0_RXE));

	/* Release any queued transmit buffers. */
	for (i = 0; i < EMAC_TXQUEUELEN; i++) {
		txs = &sc->sc_txsoft[i];
		if (txs->txs_mbuf != NULL) {
			bus_dmamap_unload(sc->sc_dmat, txs->txs_dmamap);
			m_freem(txs->txs_mbuf);
			txs->txs_mbuf = NULL;
#ifdef EMAC_TAH
			emac_tso_ssr_release(sc, txs);
#endif
		}
	}

#ifdef EMAC_TAH
	/* Free any software-segmented packets awaiting transmission. */
	while (sc->sc_txpending != NULL) {
		struct mbuf *m = sc->sc_txpending;

		sc->sc_txpending = m->m_nextpkt;
		m->m_nextpkt = NULL;
		m_freem(m);
	}
#endif

	if (disable)
		emac_rxdrain(sc);

	/*
	 * Mark the interface down and cancel the watchdog timer.
	 */
	ifp->if_flags &= ~IFF_RUNNING;
	ifp->if_timer = 0;
}

static void
emac_watchdog(struct ifnet *ifp)
{
	struct emac_softc *sc = ifp->if_softc;

	/*
	 * Since we're not interrupting every packet, sweep
	 * up before we report an error.
	 */
	emac_txreap(sc);

	if (sc->sc_txfree != EMAC_NTXDESC) {
		aprint_error_ifnet(ifp,
		    "device timeout (txfree %d txsfree %d txnext %d)\n",
		    sc->sc_txfree, sc->sc_txsfree, sc->sc_txnext);
		if_statinc(ifp, if_oerrors);

		/* Reset the interface. */
		(void)emac_init(ifp);
	} else if (ifp->if_flags & IFF_DEBUG)
		aprint_error_ifnet(ifp, "recovered from device timeout\n");

	/* try to get more packets going */
	emac_start(ifp);
}

static int
emac_add_rxbuf(struct emac_softc *sc, int idx)
{
	struct emac_rxsoft *rxs = &sc->sc_rxsoft[idx];
	struct mbuf *m;
	int error;

	MGETHDR(m, M_DONTWAIT, MT_DATA);
	if (m == NULL)
		return ENOBUFS;

	MCLGET(m, M_DONTWAIT);
	if ((m->m_flags & M_EXT) == 0) {
		m_freem(m);
		return ENOBUFS;
	}

	if (rxs->rxs_mbuf != NULL)
		bus_dmamap_unload(sc->sc_dmat, rxs->rxs_dmamap);

	rxs->rxs_mbuf = m;

	error = bus_dmamap_load(sc->sc_dmat, rxs->rxs_dmamap,
	    m->m_ext.ext_buf, m->m_ext.ext_size, NULL, BUS_DMA_NOWAIT);
	if (error) {
		aprint_error_dev(sc->sc_dev,
		    "can't load rx DMA map %d, error = %d\n", idx, error);
		panic("emac_add_rxbuf");		/* XXX */
	}

	bus_dmamap_sync(sc->sc_dmat, rxs->rxs_dmamap, 0,
	    rxs->rxs_dmamap->dm_mapsize, BUS_DMASYNC_PREREAD);

	EMAC_INIT_RXDESC(sc, idx);

	return 0;
}

static void
emac_rxdrain(struct emac_softc *sc)
{
	struct emac_rxsoft *rxs;
	int i;

	for (i = 0; i < EMAC_NRXDESC; i++) {
		rxs = &sc->sc_rxsoft[i];
		if (rxs->rxs_mbuf != NULL) {
			bus_dmamap_unload(sc->sc_dmat, rxs->rxs_dmamap);
			m_freem(rxs->rxs_mbuf);
			rxs->rxs_mbuf = NULL;
		}
	}
}

static int
emac_set_filter(struct emac_softc *sc)
{
	struct ethercom *ec = &sc->sc_ethercom;
	struct ether_multistep step;
	struct ether_multi *enm;
	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
	uint32_t rmr, crc, mask, tmp, reg, gaht[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
	int regs, cnt = 0, i;

	if (sc->sc_htsize == 256) {
		reg = EMAC_GAHT256(0);
		regs = 8;
	} else {
		reg = EMAC_GAHT64(0);
		regs = 4;
	}
	mask = (1ULL << (sc->sc_htsize / regs)) - 1;

	rmr = EMAC_READ(sc, EMAC_RMR);
	rmr &= ~(RMR_PMME | RMR_MAE);
	ifp->if_flags &= ~IFF_ALLMULTI;

	ETHER_LOCK(ec);
	ETHER_FIRST_MULTI(step, ec, enm);
	while (enm != NULL) {
		if (memcmp(enm->enm_addrlo,
		    enm->enm_addrhi, ETHER_ADDR_LEN) != 0) {
			/*
			 * We must listen to a range of multicast addresses.
			 * For now, just accept all multicasts, rather than
			 * trying to set only those filter bits needed to match
			 * the range.  (At this time, the only use of address
			 * ranges is for IP multicast routing, for which the
			 * range is big enough to require all bits set.)
			 */
			gaht[0] = gaht[1] = gaht[2] = gaht[3] =
			    gaht[4] = gaht[5] = gaht[6] = gaht[7] = mask;
			break;
		}

		crc = ether_crc32_be(enm->enm_addrlo, ETHER_ADDR_LEN);

		if (sc->sc_htsize == 256)
			EMAC_SET_FILTER256(gaht, crc);
		else
			EMAC_SET_FILTER(gaht, crc);

		ETHER_NEXT_MULTI(step, enm);
		cnt++;
	}
	ETHER_UNLOCK(ec);

	for (i = 1, tmp = gaht[0]; i < regs; i++)
		tmp &= gaht[i];
	if (tmp == mask) {
		/* All categories are true. */
		ifp->if_flags |= IFF_ALLMULTI;
		rmr |= RMR_PMME;
	} else if (cnt != 0) {
		/* Some categories are true. */
		for (i = 0; i < regs; i++)
			EMAC_WRITE(sc, reg + (i << 2), gaht[i]);
		rmr |= RMR_MAE;
	}
	EMAC_WRITE(sc, EMAC_RMR, rmr);

	return 0;
}

/*
 * Reap completed Tx descriptors.
 */
static int
emac_txreap(struct emac_softc *sc)
{
	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
	struct emac_txsoft *txs;
	int handled, i;
	uint32_t txstat, count;

	EMAC_EVCNT_INCR(&sc->sc_ev_txreap);
	handled = 0;

	count = 0;
	/*
	 * Go through our Tx list and free mbufs for those
	 * frames that have been transmitted.
	 */
	for (i = sc->sc_txsdirty; sc->sc_txsfree != EMAC_TXQUEUELEN;
	    i = EMAC_NEXTTXS(i), sc->sc_txsfree++) {
		txs = &sc->sc_txsoft[i];

		EMAC_CDTXSYNC(sc, txs->txs_lastdesc,
		    txs->txs_dmamap->dm_nsegs,
		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);

		txstat = sc->sc_txdescs[txs->txs_lastdesc].md_stat_ctrl;
		if (txstat & MAL_TX_READY)
			break;

		handled = 1;

#ifdef EMAC_TAH
		/* The TAH is done with this job's segment size slot. */
		emac_tso_ssr_release(sc, txs);
#endif

		/*
		 * Check for errors and collisions.
		 */
		if (txstat & (EMAC_TXS_UR | EMAC_TXS_ED))
			if_statinc(ifp, if_oerrors);

#ifdef EMAC_EVENT_COUNTERS
		if (txstat & EMAC_TXS_UR)
			EMAC_EVCNT_INCR(&sc->sc_ev_tu);
#endif /* EMAC_EVENT_COUNTERS */

#ifdef EMAC_TAH
		/*
		 * On TAH channels EMAC_TXS_TED (which overlays
		 * EMAC_TXS_BPP) means the TAH aborted the packet
		 */
		if (sc->sc_tah && (txstat & EMAC_TXS_TED)) {
			uint32_t tsr = TAH_READ(sc, TAH_TSR);

			EMAC_EVCNT_INCR(&sc->sc_ev_tahted);
			if_statinc(ifp, if_oerrors);
			if (ifp->if_flags & IFF_DEBUG)
				aprint_error_ifnet(ifp,
				    "TAH transmit error, TSR = 0x%08x\n", tsr);
		}
#endif

		if (txstat &
		    (EMAC_TXS_EC | EMAC_TXS_MC | EMAC_TXS_SC | EMAC_TXS_LC)) {
			if (txstat & EMAC_TXS_EC)
				if_statadd(ifp, if_collisions, 16);
			else if (txstat & EMAC_TXS_MC)
				if_statadd(ifp, if_collisions, 2); /* XXX? */
			else if (txstat & EMAC_TXS_SC)
				if_statinc(ifp, if_collisions);
			if (txstat & EMAC_TXS_LC)
				if_statinc(ifp, if_collisions);
		} else {
#ifdef EMAC_TAH
			/* a TSO burst produces several wire packets */
			if_statadd(ifp, if_opackets, txs->txs_opackets);
#else
			if_statinc(ifp, if_opackets);
#endif
		}

		if (ifp->if_flags & IFF_DEBUG) {
			if (txstat & EMAC_TXS_ED)
				aprint_error_ifnet(ifp, "excessive deferral\n");
			if (txstat & EMAC_TXS_EC)
				aprint_error_ifnet(ifp,
				    "excessive collisions\n");
		}

		sc->sc_txfree += txs->txs_ndesc;
		bus_dmamap_sync(sc->sc_dmat, txs->txs_dmamap,
		    0, txs->txs_dmamap->dm_mapsize, BUS_DMASYNC_POSTWRITE);
		bus_dmamap_unload(sc->sc_dmat, txs->txs_dmamap);
		m_freem(txs->txs_mbuf);
		txs->txs_mbuf = NULL;

		count++;
	}

	/* Update the dirty transmit buffer pointer. */
	sc->sc_txsdirty = i;

	/*
	 * If there are no more pending transmissions, cancel the watchdog
	 * timer.
	 */
	if (sc->sc_txsfree == EMAC_TXQUEUELEN)
		ifp->if_timer = 0;

	if (count != 0)
		rnd_add_uint32(&sc->rnd_source, count);

	return handled;
}


/*
 * Reset functions
 */

static void
emac_soft_reset(struct emac_softc *sc)
{
	uint32_t sdr;
	int t = 0;

	/*
	 * The PHY must provide a TX Clk in order perform a soft reset the
	 * EMAC.  If none is present, select the internal clock,
	 * SDR0_MFR[E0CS, E1CS].  After the soft reset, select the external
	 * clock.
	 */

	if (sc->sc_ethcfg_ecs) {
		sdr = mfsdr(DCR_SDR0_ETH_CFG);
		sdr |= SDR0_ETH_CFG_ECS(sc->sc_instance);
		mtsdr(DCR_SDR0_ETH_CFG, sdr);
	} else {
		sdr = mfsdr(DCR_SDR0_MFR);
		sdr |= SDR0_MFR_ECS(sc->sc_instance);
		mtsdr(DCR_SDR0_MFR, sdr);
	}

	EMAC_WRITE(sc, EMAC_MR0, MR0_SRST);

	if (sc->sc_ethcfg_ecs) {
		sdr = mfsdr(DCR_SDR0_ETH_CFG);
		sdr &= ~SDR0_ETH_CFG_ECS(sc->sc_instance);
		mtsdr(DCR_SDR0_ETH_CFG, sdr);
	} else {
		sdr = mfsdr(DCR_SDR0_MFR);
		sdr &= ~SDR0_MFR_ECS(sc->sc_instance);
		mtsdr(DCR_SDR0_MFR, sdr);
	}

	delay(5);

	/* wait finish */
	while (EMAC_READ(sc, EMAC_MR0) & MR0_SRST) {
		if (++t == 1000000 /* 1sec XXXXX */) {
			aprint_error_dev(sc->sc_dev, "Soft Reset failed\n");
			return;
		}
		delay(1);
	}
}

static void
emac_smart_reset(struct emac_softc *sc)
{
	uint32_t mr0;
	int t = 0;

	mr0 = EMAC_READ(sc, EMAC_MR0);
	if (mr0 & (MR0_TXE | MR0_RXE)) {
		mr0 &= ~(MR0_TXE | MR0_RXE);
		EMAC_WRITE(sc, EMAC_MR0, mr0);

		/* wait idel state */
		while ((EMAC_READ(sc, EMAC_MR0) & (MR0_TXI | MR0_RXI)) !=
		    (MR0_TXI | MR0_RXI)) {
			if (++t == 1000000 /* 1sec XXXXX */) {
				aprint_error_dev(sc->sc_dev,
				    "Smart Reset failed\n");
				return;
			}
			delay(1);
		}
	}
}

#ifdef EMAC_TAH
static void
emac_tah_reset(struct emac_softc *sc)
{
	int t = 0;

	TAH_WRITE(sc, TAH_MR, TAH_MR_SR);

	while (TAH_READ(sc, TAH_MR) & TAH_MR_SR) {
		if (++t == 1000000 /* 1sec XXXXX */) {
			aprint_error_dev(sc->sc_dev,
			    "TAH Soft Reset failed\n");
			return;
		}
		delay(1);
	}

	TAH_WRITE(sc, TAH_MR,
	    TAH_MR_ST(768 / 256) | TAH_MR_TFS_10K | TAH_MR_DTFP |
	    TAH_MR_DIG | TAH_MR_IPV6 |
	    (sc->sc_tah_cvr ? TAH_MR_CVR : 0));

	memset(sc->sc_ssr_bytes, 0, sizeof(sc->sc_ssr_bytes));
	memset(sc->sc_ssr_refs, 0, sizeof(sc->sc_ssr_refs));
}
#endif /* EMAC_TAH */


/*
 * MII related functions
 */

static int
emac_mii_readreg(device_t self, int phy, int reg, uint16_t *val)
{
	struct emac_softc *sc = device_private(self);
	uint32_t sta_reg;
	int rv;

	if (sc->sc_rmii_enable)
		sc->sc_rmii_enable(device_parent(self), sc->sc_instance);

	/* wait for PHY data transfer to complete */
	if ((rv = emac_mii_wait(sc)) != 0)
		goto fail;

	sta_reg =
	    sc->sc_stacr_read		|
	    (reg << STACR_PRA_SHIFT)	|
	    (phy << STACR_PCDA_SHIFT)	|
	    sc->sc_stacr_bits;
	EMAC_WRITE(sc, EMAC_STACR, sta_reg);

	if ((rv = emac_mii_wait(sc)) != 0)
		goto fail;
	sta_reg = EMAC_READ(sc, EMAC_STACR);

	if (sta_reg & STACR_PHYE) {
		rv = -1;
		goto fail;
	}
	*val = sta_reg >> STACR_PHYD_SHIFT;

fail:
	if (sc->sc_rmii_disable)
		sc->sc_rmii_disable(device_parent(self), sc->sc_instance);
	return rv;
}

static int
emac_mii_writereg(device_t self, int phy, int reg, uint16_t val)
{
	struct emac_softc *sc = device_private(self);
	uint32_t sta_reg;
	int rv;

	if (sc->sc_rmii_enable)
		sc->sc_rmii_enable(device_parent(self), sc->sc_instance);

	/* wait for PHY data transfer to complete */
	if ((rv = emac_mii_wait(sc)) != 0)
		goto out;

	sta_reg =
	    (val << STACR_PHYD_SHIFT)	|
	    sc->sc_stacr_write		|
	    (reg << STACR_PRA_SHIFT)	|
	    (phy << STACR_PCDA_SHIFT)	|
	    sc->sc_stacr_bits;
	EMAC_WRITE(sc, EMAC_STACR, sta_reg);

	if ((rv = emac_mii_wait(sc)) != 0)
		goto out;
	if (EMAC_READ(sc, EMAC_STACR) & STACR_PHYE) {
		aprint_error_dev(sc->sc_dev, "MII PHY Error\n");
		rv = -1;
	}

out:
	if (sc->sc_rmii_disable)
		sc->sc_rmii_disable(device_parent(self), sc->sc_instance);

	return rv;
}

static void
emac_mii_statchg(struct ifnet *ifp)
{
	struct emac_softc *sc = ifp->if_softc;
	struct mii_data *mii = &sc->sc_mii;

	/*
	 * MR1 can only be written immediately after a reset...
	 */
	emac_smart_reset(sc);

	sc->sc_mr1 &= ~(MR1_FDE | MR1_ILE | MR1_EIFC | MR1_MF_MASK | MR1_IST);
	if (mii->mii_media_active & IFM_FDX)
		sc->sc_mr1 |= (MR1_FDE | MR1_EIFC | MR1_IST);
	if (mii->mii_media_active & IFM_FLOW)
		sc->sc_mr1 |= MR1_EIFC;
	if (mii->mii_media_active & IFM_LOOP)
		sc->sc_mr1 |= MR1_ILE;
	switch (IFM_SUBTYPE(mii->mii_media_active)) {
	case IFM_1000_T:
		sc->sc_mr1 |= (MR1_MF_1000MBS | MR1_IST);
		break;

	case IFM_100_TX:
		sc->sc_mr1 |= (MR1_MF_100MBS | MR1_IST);
		break;

	case IFM_10_T:
		sc->sc_mr1 |= MR1_MF_10MBS;
		break;

	case IFM_NONE:
		break;

	default:
		aprint_error_dev(sc->sc_dev, "unknown sub-type %d\n",
		    IFM_SUBTYPE(mii->mii_media_active));
		break;
	}
	if (sc->sc_rmii_speed)
		sc->sc_rmii_speed(device_parent(sc->sc_dev), sc->sc_instance,
		    IFM_SUBTYPE(mii->mii_media_active));

	EMAC_WRITE(sc, EMAC_MR1, sc->sc_mr1);

	/* Enable TX and RX if already RUNNING */
	if (ifp->if_flags & IFF_RUNNING)
		EMAC_WRITE(sc, EMAC_MR0, MR0_TXE | MR0_RXE);
}

static uint32_t
emac_mii_wait(struct emac_softc *sc)
{
	int i;
	uint32_t oc;

	/* wait for PHY data transfer to complete */
	i = 0;
	oc = EMAC_READ(sc, EMAC_STACR) & STACR_OC;
	while ((oc == STACR_OC) != sc->sc_stacr_completed) {
		delay(7);
		if (i++ > 5) {
			aprint_error_dev(sc->sc_dev, "MII timed out\n");
			return ETIMEDOUT;
		}
		oc = EMAC_READ(sc, EMAC_STACR) & STACR_OC;
	}
	return 0;
}

static void
emac_mii_tick(void *arg)
{
	struct emac_softc *sc = arg;
	int s;

	if (!device_is_active(sc->sc_dev))
		return;

	s = splnet();
	mii_tick(&sc->sc_mii);
	splx(s);

	callout_reset(&sc->sc_callout, hz, emac_mii_tick, sc);
}

int
emac_txeob_intr(void *arg)
{
	struct emac_softc *sc = arg;
	int handled = 0;

	EMAC_EVCNT_INCR(&sc->sc_ev_txintr);
	handled |= emac_txreap(sc);

	/* try to get more packets going */
	if_schedule_deferred_start(&sc->sc_ethercom.ec_if);

	return handled;
}

int
emac_rxeob_intr(void *arg)
{
	struct emac_softc *sc = arg;
	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
	struct emac_rxsoft *rxs;
	struct mbuf *m;
	uint32_t rxstat, count;
	int i, len;
#ifdef EMAC_TAH
	bool csum_bad;
#endif

	EMAC_EVCNT_INCR(&sc->sc_ev_rxintr);

	count = 0;
	for (i = sc->sc_rxptr; ; i = EMAC_NEXTRX(i)) {
		rxs = &sc->sc_rxsoft[i];

		EMAC_CDRXSYNC(sc, i,
		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);

		rxstat = sc->sc_rxdescs[i].md_stat_ctrl;

		if (rxstat & MAL_RX_EMPTY) {
			/*
			 * We have processed all of the receive buffers.
			 */
			/* Flush current empty descriptor */
			EMAC_CDRXSYNC(sc, i,
			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
			break;
		}

#ifdef EMAC_TAH
		csum_bad = false;
		if (sc->sc_tah_cvr &&
		    (rxstat & EMAC_RXS_CSUM_MASK) == EMAC_RXS_CSUM_BAD) {
			rxstat &= ~EMAC_RXS_CSUM_MASK;
			csum_bad = true;
		}
#endif

		/*
		 * If an error occurred, update stats, clear the status
		 * word, and leave the packet buffer in place.  It will
		 * simply be reused the next time the ring comes around.
		 */
		if (rxstat & (EMAC_RXS_OE | EMAC_RXS_BP | EMAC_RXS_SE |
		    EMAC_RXS_AE | EMAC_RXS_BFCS | EMAC_RXS_PTL | EMAC_RXS_ORE |
		    EMAC_RXS_IRE)) {
#define	PRINTERR(bit, str)					\
			if (rxstat & (bit))			\
				aprint_error_ifnet(ifp,		\
				    "receive error: %s\n", str)
			if_statinc(ifp, if_ierrors);
			PRINTERR(EMAC_RXS_OE, "overrun error");
			PRINTERR(EMAC_RXS_BP, "bad packet");
			PRINTERR(EMAC_RXS_RP, "runt packet");
			PRINTERR(EMAC_RXS_SE, "short event");
			PRINTERR(EMAC_RXS_AE, "alignment error");
			PRINTERR(EMAC_RXS_BFCS, "bad FCS");
			PRINTERR(EMAC_RXS_PTL, "packet too long");
			PRINTERR(EMAC_RXS_ORE, "out of range error");
			PRINTERR(EMAC_RXS_IRE, "in range error");
#undef PRINTERR
			EMAC_INIT_RXDESC(sc, i);
			continue;
		}

		bus_dmamap_sync(sc->sc_dmat, rxs->rxs_dmamap, 0,
		    rxs->rxs_dmamap->dm_mapsize, BUS_DMASYNC_POSTREAD);

		/*
		 * No errors; receive the packet.  Note, the 405GP emac
		 * includes the CRC with every packet.
		 */
		len = sc->sc_rxdescs[i].md_data_len - ETHER_CRC_LEN;

		/*
		 * If the packet is small enough to fit in a
		 * single header mbuf, allocate one and copy
		 * the data into it.  This greatly reduces
		 * memory consumption when we receive lots
		 * of small packets.
		 *
		 * Otherwise, we add a new buffer to the receive
		 * chain.  If this fails, we drop the packet and
		 * recycle the old buffer.
		 */
		if (emac_copy_small != 0 && len <= MHLEN) {
			MGETHDR(m, M_DONTWAIT, MT_DATA);
			if (m == NULL)
				goto dropit;
			memcpy(mtod(m, void *),
			    mtod(rxs->rxs_mbuf, void *), len);
			EMAC_INIT_RXDESC(sc, i);
			bus_dmamap_sync(sc->sc_dmat, rxs->rxs_dmamap, 0,
			    rxs->rxs_dmamap->dm_mapsize,
			    BUS_DMASYNC_PREREAD);
		} else {
			m = rxs->rxs_mbuf;
			if (emac_add_rxbuf(sc, i) != 0) {
 dropit:
				if_statinc(ifp, if_ierrors);
				EMAC_INIT_RXDESC(sc, i);
				bus_dmamap_sync(sc->sc_dmat,
				    rxs->rxs_dmamap, 0,
				    rxs->rxs_dmamap->dm_mapsize,
				    BUS_DMASYNC_PREREAD);
				continue;
			}
		}

		m_set_rcvif(m, ifp);
		m->m_pkthdr.len = m->m_len = len;

#ifdef EMAC_TAH
		if (sc->sc_tah_cvr)
			emac_rx_csum(sc, m, csum_bad);
#endif

		/* Pass it on. */
		if_percpuq_enqueue(ifp->if_percpuq, m);

		count++;
	}

	/* Update the receive pointer. */
	sc->sc_rxptr = i;

	if (count != 0)
		rnd_add_uint32(&sc->rnd_source, count);

	return 1;
}

int
emac_txde_intr(void *arg)
{
	struct emac_softc *sc = arg;

	EMAC_EVCNT_INCR(&sc->sc_ev_txde);
	aprint_error_dev(sc->sc_dev, "emac_txde_intr\n");
	return 1;
}

int
emac_rxde_intr(void *arg)
{
	struct emac_softc *sc = arg;
	int i;

	EMAC_EVCNT_INCR(&sc->sc_ev_rxde);
	aprint_error_dev(sc->sc_dev, "emac_rxde_intr\n");
	/*
	 * XXX!
	 * This is a bit drastic; we just drop all descriptors that aren't
	 * "clean".  We should probably send any that are up the stack.
	 */
	for (i = 0; i < EMAC_NRXDESC; i++) {
		EMAC_CDRXSYNC(sc, i,
		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);

		if (sc->sc_rxdescs[i].md_data_len != MCLBYTES)
			EMAC_INIT_RXDESC(sc, i);
	}

	return 1;
}
