# Problem with Broadcom NIC (tg3)

## transacid

Hi,

I have a very strange problem. I have a Thinkpad R51e with a Broadcom NetXtreme BCM5751F which is the kernel module tg3. The issue is that if i try to upload or download big files, e.g. 735mb, with scp or similar it gets stalled after a while. Even unloading an reloading the tg3 module doesn't help to get network back running, i have to reboot. I dun have a clue why it is this way because i tried it on debian and there it doesn't happen. So i suppose it must be another kernel module which causes the hangup.

lspci:

```
00:00.0 Host bridge: ATI Technologies Inc Unknown device 5a31 (rev 01)

00:01.0 PCI bridge: ATI Technologies Inc RS480 PCI Bridge

00:04.0 PCI bridge: ATI Technologies Inc RS480 PCI Bridge

00:06.0 PCI bridge: ATI Technologies Inc RS480 PCI Bridge

00:13.0 USB Controller: ATI Technologies Inc IXP SB400 USB Host Controller

00:13.1 USB Controller: ATI Technologies Inc IXP SB400 USB Host Controller

00:13.2 USB Controller: ATI Technologies Inc IXP SB400 USB2 Host Controller

00:14.0 SMBus: ATI Technologies Inc IXP SB400 SMBus Controller (rev 11)

00:14.1 IDE interface: ATI Technologies Inc Standard Dual Channel PCI IDE Controller ATI

00:14.3 ISA bridge: ATI Technologies Inc IXP SB400 PCI-ISA Bridge

00:14.4 PCI bridge: ATI Technologies Inc IXP SB400 PCI-PCI Bridge

00:14.5 Multimedia audio controller: ATI Technologies Inc IXP SB400 AC'97 Audio Controller (rev 02)

01:05.0 VGA compatible controller: ATI Technologies Inc RC410 [Radeon Xpress 200M]

02:00.0 Ethernet controller: Broadcom Corporation NetXtreme BCM5751F Fast Ethernet PCI Express (rev 21)

04:00.0 CardBus bridge: Texas Instruments PCI1510 PC card Cardbus Controller

04:02.0 Ethernet controller: Atheros Communications, Inc. AR5212 802.11abg NIC (rev 01)

```

all dmesg say is that: dmesg | grep -i eth0

```
eth0: Tigon3 [partno(BCM95751F) rev 4201 PHY(5750)] (PCI Express) 10/100BaseT Ethernet 00:0a:e4:3d:c1:77

eth0: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[0] Split[0] WireSpeed[1] TSOcap[1] 

eth0: dma_rwctrl[76180000] dma_mask[64-bit]

tg3: eth0: Link is up at 100 Mbps, full duplex.

tg3: eth0: Flow control is on for TX and on for RX.

```

here the end of strace: strace -o scp scp backup.tbz 192.168.0.1:

```
....SNIP

read(3, "\2716\244\255d.K\352\31\255\2731>\201\371\213\4\241z\231"..., 4096) = 4096

write(6, "\2716\244\255d.K\352\31\255\2731>\201\371\213\4\241z\231"..., 4096) = 4096

read(3, "\330\n\231\210}\221c\312T7\253\17\v\2671\257V=\352}\231"..., 4096) = 4096

write(6, "\330\n\231\210}\221c\312T7\253\17\v\2671\257V=\352}\231"..., 4096) = 4096

read(3, "0\214\177\201\342]\251\231K?k\343^\16\241\272\n~\213$\304"..., 4096) = 4096

write(6, "0\214\177\201\342]\251\231K?k\343^\16\241\272\n~\213$\304"..., 4096) = 4096

read(3, ";\30*<\325mw\363\1\334\373\27\\\374\372\24\341\24\23f/"..., 4096) = 4096

write(6, ";\30*<\325mw\363\1\334\373\27\\\374\372\24\341\24\23f/"..., 4096) = 4096

read(3, "\374q\353p\330\336\211\205\214\0@9\331\360\372\332^\312"..., 4096) = 4096

write(6, "\374q\353p\330\336\211\205\214\0@9\331\360\372\332^\312"..., 4096) = 4096

read(3, "y\4b\3024\234I\253,_\362b\224\266\305\210\6d\275\301\345"..., 4096) = 4096

write(6, "y\4b\3024\234I\253,_\362b\224\266\305\210\6d\275\301\345"..., 4096) = 4096

read(3, "i\"\353\200Zt\26\304\217$`H\255q\'\276~\223X\35\373\247"..., 4096) = 4096

write(6, "i\"\353\200Zt\26\304\217$`H\255q\'\276~\223X\35\373\247"..., 4096) = ? ERESTARTSYS (To be restarted)

--- SIGALRM (Alarm clock) @ 0 (0) ---

getpgrp()                               = 5254

ioctl(1, TIOCGPGRP, [5254])             = 0

time(NULL)                              = 1170093172

write(1, "\rbackup.tbz                     "..., 179) = 179

rt_sigaction(SIGALRM, NULL, {0x804ce70, [], SA_INTERRUPT}, 8) = 0

alarm(1)                                = 0

sigreturn()                             = ? (mask now [])

write(6, "i\"\353\200Zt\26\304\217$`H\255q\'\276~\223X\35\373\247"..., 4096) = ? ERESTARTSYS (To be restarted)

--- SIGALRM (Alarm clock) @ 0 (0) ---

getpgrp()                               = 5254

ioctl(1, TIOCGPGRP, [5254])             = 0

time(NULL)                              = 1170093173

write(1, "\rbackup.tbz                     "..., 179) = 179

rt_sigaction(SIGALRM, NULL, {0x804ce70, [], SA_INTERRUPT}, 8) = 0

alarm(1)                                = 0

sigreturn()                             = ? (mask now [])

write(6, "i\"\353\200Zt\26\304\217$`H\255q\'\276~\223X\35\373\247"..., 4096) = ? ERESTARTSYS (To be restarted)

--- SIGALRM (Alarm clock) @ 0 (0) ---

getpgrp()                               = 5254

ioctl(1, TIOCGPGRP, [5254])             = 0

time(NULL)                              = 1170093174

write(1, "\rbackup.tbz                     "..., 179) = 179

rt_sigaction(SIGALRM, NULL, {0x804ce70, [], SA_INTERRUPT}, 8) = 0

alarm(1)                                = 0

sigreturn()                             = ? (mask now [])

write(6, "i\"\353\200Zt\26\304\217$`H\255q\'\276~\223X\35\373\247"..., 4096) = ? ERESTARTSYS (To be restarted)

--- SIGALRM (Alarm clock) @ 0 (0) ---

getpgrp()                               = 5254

ioctl(1, TIOCGPGRP, [5254])             = 0

time(NULL)                              = 1170093175

write(1, "\rbackup.tbz                     "..., 179) = 179

rt_sigaction(SIGALRM, NULL, {0x804ce70, [], SA_INTERRUPT}, 8) = 0

alarm(1)                                = 0

sigreturn()                             = ? (mask now [])

write(6, "i\"\353\200Zt\26\304\217$`H\255q\'\276~\223X\35\373\247"..., 4096) = ? ERESTARTSYS (To be restarted)

--- SIGALRM (Alarm clock) @ 0 (0) ---

getpgrp()                               = 5254

ioctl(1, TIOCGPGRP, [5254])             = 0

time(NULL)                              = 1170093176

write(1, "\rbackup.tbz                     "..., 179) = 179

rt_sigaction(SIGALRM, NULL, {0x804ce70, [], SA_INTERRUPT}, 8) = 0

alarm(1)                                = 0

sigreturn()                             = ? (mask now [])

write(6, "i\"\353\200Zt\26\304\217$`H\255q\'\276~\223X\35\373\247"..., 4096) = ? ERESTARTSYS (To be restarted)

--- SIGALRM (Alarm clock) @ 0 (0) ---

getpgrp()                               = 5254

ioctl(1, TIOCGPGRP, [5254])             = 0

time(NULL)                              = 1170093177

write(1, "\rbackup.tbz                     "..., 179) = 179

rt_sigaction(SIGALRM, NULL, {0x804ce70, [], SA_INTERRUPT}, 8) = 0

alarm(1)                                = 0

sigreturn()                             = ? (mask now [])

write(6, "i\"\353\200Zt\26\304\217$`H\255q\'\276~\223X\35\373\247"..., 4096) = ? ERESTARTSYS (To be restarted)

--- SIGALRM (Alarm clock) @ 0 (0) ---

getpgrp()                               = 5254

ioctl(1, TIOCGPGRP, [5254])             = 0

time(NULL)                              = 1170093178

write(1, "\rbackup.tbz                     "..., 179) = 179

rt_sigaction(SIGALRM, NULL, {0x804ce70, [], SA_INTERRUPT}, 8) = 0

alarm(1)                                = 0

sigreturn()                             = ? (mask now [])

write(6, "i\"\353\200Zt\26\304\217$`H\255q\'\276~\223X\35\373\247"..., 4096) = ? ERESTARTSYS (To be restarted)

--- SIGALRM (Alarm clock) @ 0 (0) ---

getpgrp()                               = 5254

ioctl(1, TIOCGPGRP, [5254])             = 0

time(NULL)                              = 1170093179

write(1, "\rbackup.tbz                     "..., 179) = 179

rt_sigaction(SIGALRM, NULL, {0x804ce70, [], SA_INTERRUPT}, 8) = 0

alarm(1)                                = 0

sigreturn()                             = ? (mask now [])

write(6, "i\"\353\200Zt\26\304\217$`H\255q\'\276~\223X\35\373\247"..., 4096) = ? ERESTARTSYS (To be restarted)

--- SIGALRM (Alarm clock) @ 0 (0) ---

getpgrp()                               = 5254

ioctl(1, TIOCGPGRP, [5254])             = 0

time(NULL)                              = 1170093180

write(1, "\rbackup.tbz                     "..., 179) = 179

rt_sigaction(SIGALRM, NULL, {0x804ce70, [], SA_INTERRUPT}, 8) = 0

alarm(1)                                = 0

sigreturn()                             = ? (mask now [])

write(6, "i\"\353\200Zt\26\304\217$`H\255q\'\276~\223X\35\373\247"..., 4096) = ? ERESTARTSYS (To be restarted)

--- SIGINT (Interrupt) @ 0 (0) ---

kill(5256, SIGINT)                      = 0

waitpid(5256, NULL, 0)                  = 5256

--- SIGCHLD (Child exited) @ 0 (0) ---

exit_group(1)                           = ?

```

In case you need it my emerge --info:

```
Portage 2.1.2-r3 (default-linux/x86/2006.1/desktop, gcc-4.1.1, glibc-2.5-r0, 2.6.19-gentoo-r4 i686)

=================================================================

System uname: 2.6.19-gentoo-r4 i686 Intel(R) Celeron(R) M processor         1.50GHz

Gentoo Base System version 1.12.9

Timestamp of tree: Wed, 24 Jan 2007 17:20:01 +0000

ccache version 2.4 [enabled]

dev-java/java-config: 1.3.7, 2.0.31-r3

dev-lang/python:     2.4.4

dev-python/pycrypto: 2.0.1-r5

dev-util/ccache:     2.4-r6

sys-apps/sandbox:    1.2.18.1

sys-devel/autoconf:  2.13, 2.61

sys-devel/automake:  1.4_p6, 1.5, 1.6.3, 1.7.9-r1, 1.8.5-r3, 1.9.6-r2, 1.10

sys-devel/binutils:  2.17

sys-devel/gcc-config: 1.3.14

sys-devel/libtool:   1.5.22

virtual/os-headers:  2.6.19.2-r1

ACCEPT_KEYWORDS="x86 ~x86"

AUTOCLEAN="yes"

CBUILD="i686-pc-linux-gnu"

CFLAGS="-O3 -march=pentium-m -pipe"

CHOST="i686-pc-linux-gnu"

CONFIG_PROTECT="/etc /usr/share/X11/xkb"

CONFIG_PROTECT_MASK="/etc/env.d /etc/env.d/java/ /etc/gconf /etc/init.d /etc/java-config/vms/ /etc/revdep-rebuild /etc/splash /etc/terminfo"

CXXFLAGS="-O3 -march=pentium-m -pipe"

DISTDIR="/usr/portage/distfiles"

FEATURES="autoconfig ccache distlocks metadata-transfer sandbox sfperms strict"

GENTOO_MIRRORS="http://linux.rz.ruhr-uni-bochum.de/download/gentoo-mirror/ http://ftp.uni-erlangen.de/pub/mirrors/gentoo http://mirrors.sec.informatik.tu-darmstadt.de/gentoo/ http://ftp-stud.fht-esslingen.de/pub/Mirrors/gentoo/ http://pandemonium.tiscali.de/pub/gentoo/ http://gentoo.intergenia.de "

LANG="de_DE@euro"

LC_ALL="de_DE@euro"

LINGUAS="de"

MAKEOPTS="-j2"

PKGDIR="/usr/portage/packages"

PORTAGE_RSYNC_OPTS="--recursive --links --safe-links --perms --times --compress --force --whole-file --delete --delete-after --stats --timeout=180 --exclude=/distfiles --exclude=/local --exclude=/packages"

PORTAGE_TMPDIR="/var/tmp"

PORTDIR="/usr/portage"

PORTDIR_OVERLAY="/usr/portage/local/layman/dma147 /usr/portage/local/layman/gentoo-de"

SYNC="rsync://rsync.de.gentoo.org/gentoo-portage"

USE="X X509 Xaw3d a52 aac acpi alsa amr apache2 arts asf audacious bash-completion berkdb bitmap-fonts bzip2 cairo cdb cddb cdparanoia cdr cdrom chardet cli cracklib crypt css curl curlwrappers dbus directfb dlloader dri dvd dvdread eds encode escreen esd extras fame fbcon ffmpeg firefox flac fortran fping gif glitz gnutls gpgme gpm gstreamer gtk gtk2 gtkhtml hddtemp iconv icq idn imagemagick imap imlib innkeywords isdnlog jabber java jpeg jpeg2k lame libcaca libg++ lzo mad maildir mbox mikmod mjpeg mmx mmxext mng modplug motif mp3 mp4live mpeg mpeg2 mplayer musepack nagios-ping nagios-ssh nas ncurses network nls no-suexec nptl nptlonly nsplugin offensive ogg openal opengl oss pam pcre pdf perl png pop ppds python qt3 qt4 quicktime rar readline real recode reflection rplay ruby sasl sdl session sftplogging smime speex spell spl sse sse2 ssl stroke svga symlink tcl tcltk theora tiff tk truetype truetype-fonts type1-fonts udev urandom userlocales vcd verbose vidix vim-pager vim-syntax vim-with-x vorbis win32codecs wma wmf x264 x86 xanim xine xml xorg xosd xpm xv xvid xvmc zlib" ALSA_CARDS="ali5451 als4000 atiixp atiixp-modem bt87x ca0106 cmipci emu10k1x ens1370 ens1371 es1938 es1968 fm801 hda-intel intel8x0 intel8x0m maestro3 trident usb-audio via82xx via82xx-modem ymfpci" ALSA_PCM_PLUGINS="adpcm alaw asym copy dmix dshare dsnoop empty extplug file hooks iec958 ioplug ladspa lfloat linear meter mulaw multi null plug rate route share shm softvol" ELIBC="glibc" INPUT_DEVICES="keyboard mouse" KERNEL="linux" LCD_DEVICES="bayrad cfontz cfontz633 glk hd44780 lb216 lcdm001 mtxorb ncurses text" LINGUAS="de" USERLAND="GNU" VIDEO_CARDS="radeon vga fbdev"

Unset:  CTARGET, EMERGE_DEFAULT_OPTS, INSTALL_MASK, LDFLAGS, PORTAGE_RSYNC_EXTRA_OPTS

```

Thanks for your suggestions

----------

## mbar

do you use reiserfs?

----------

## transacid

no ext3

----------

## firewrks

Give this a shot:

```
 echo 0 > /proc/sys/net/ipv4/tcp_window_scaling
```

----------

## transacid

 *firewrks wrote:*   

> Give this a shot:
> 
> ```
>  echo 0 > /proc/sys/net/ipv4/tcp_window_scaling
> ```
> ...

 

no, unfortunately the connection still stalls

----------

## klowe

DeletedLast edited by klowe on Wed Apr 29, 2015 11:23 am; edited 2 times in total

----------

## transacid

 *klowe wrote:*   

>  *transacid wrote:*   
> 
> I have a very strange problem. I have a Thinkpad R51e with a Broadcom NetXtreme BCM5751F which is the kernel module tg3. The issue is that if i try to upload or download big files, e.g. 735mb, with scp or similar it gets stalled after a while. Even unloading an reloading the tg3 module doesn't help to get network back running, i have to reboot. I dun have a clue why it is this way because i tried it on debian and there it doesn't happen. So i suppose it must be another kernel module which causes the hangup.
> 
>  
> ...

 

Where exactly do you see the error?

----------

## tzimmer

 *transacid wrote:*   

> 
> 
> I have a very strange problem. I have a Thinkpad R51e with a Broadcom NetXtreme BCM5751F which is the kernel module tg3. The issue is that if i try to upload or download big files, e.g. 735mb, with scp or similar it gets stalled after a while. Even unloading an reloading the tg3 module doesn't help to get network back running, i have to reboot. I dun have a clue why it is this way because i tried it on debian and there it doesn't happen. So i suppose it must be another kernel module which causes the hangup.
> 
> 

 

I'm having the exactly the same problem with a NetXtreme BCM5788 using the tg3 module. The connection gets also stalled after some traffic or some time - I couldn't make that out yet. The only way to get the link working again is to reboot in my case, too. Could you solve the problem with your card?

I have to confess that I changed from gentoo to debian just recently but still there's nothing better than the good old gentoo forums.  :Wink:  So I'm using kernel 2.6.22-3-k7 there. Perhaps I just have to try another kernel but it seems to be no known issue cause I could only find this thread on the topic.

----------

## CDiMa

 *tzimmer wrote:*   

>  *transacid wrote:*   
> 
> I have a very strange problem. I have a Thinkpad R51e with a Broadcom NetXtreme BCM5751F which is the kernel module tg3. The issue is that if i try to upload or download big files, e.g. 735mb, with scp or similar it gets stalled after a while. Even unloading an reloading the tg3 module doesn't help to get network back running, i have to reboot. I dun have a clue why it is this way because i tried it on debian and there it doesn't happen. So i suppose it must be another kernel module which causes the hangup.
> 
>  
> ...

 

I'm having the same problem here on my DELL Latitude D620 with the integrated BCM5752. 

I've never experienced problems until I upgraded from 2.6.22-suspend2-r2 to 2.6.23-gentoo-r9. Since then, when I have heavy network traffic, I consistently experience loss of connectivity. I've changed several kernels, tried several patches but with no luck.

Actually I'm running on a 2.6.25-rc5-git3 that otherwise works flawlessly.

I was able to trigger the lock by doing an iperf test run. After the connection stalled ifconfig started showing dropped packets. With this latest kernel iperf doesn't  lock the network card but I still experience random locks during downloads.

I don't have to reboot to fix the network card, an ethtool -t eth0 suffices to fix it and so, if I'm vigilant enough, I'm able to complete the running download.

I'm convinced too that it must be some strange interference between the tg3 driver and some other kernel component or module. The several kernel I tested had different versions of the tg3 driver but the problem was always the same. Unfortunately it's difficult for me to compare the differences in the config against the 2.6.22-r2 that worked. I made many changes to the config and so it's difficult to track down which one interferes with the network module.

----------

## CDiMa

 *CDiMa wrote:*   

> 
> 
> I've never experienced problems until I upgraded from 2.6.22-suspend2-r2 to 2.6.23-gentoo-r9. Since then, when I have heavy network traffic, I consistently experience loss of connectivity. I've changed several kernels, tried several patches but with no luck.
> 
> Actually I'm running on a 2.6.25-rc5-git3 that otherwise works flawlessly.
> ...

 

Ok, I've been through it again and maybe now it's solved.

Some days ago I had found a thread on the fa.linux.kernel newsgroup about a tg3 data corruption regression in kernels 2.6.24/2.6.23.4

I had already tried one of the patches suggested and it didn't work. I've examined the thread again and noticed that I didn't apply all patches, so I decided to retry.

These are the steps I did:

1) Quoted from the newsgroup:

```
Perhaps when we get to the end of the data-stream, there is a

tiny frag that the chip cannot handle.  That's the only thing I can

think of.

Please try this patch to see if the problem goes away.  This will

disable SG on 5701 so we always get linear SKBs.

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c

index db606b6..bb37e76 100644

--- a/drivers/net/tg3.c

+++ b/drivers/net/tg3.c

@@ -12717,6 +12717,9 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,

        } else

                tp->tg3_flags &= ~TG3_FLAG_RX_CHECKSUMS;

+       if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701)

+               dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG);

+

        /* flow control autonegotiation is default behavior */

        tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG;

        tp->link_config.flowctrl = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX;

```

I manually added the three lines in tg3.c changing the revision of the asic to match mine (5752).

2) Then I added this other change:

```
Also, after some digging, I found that the 5701 can run into trouble if

a 64-bit DMA read terminates early and then completes as a 32-bit transfer.

The problem is reportedly very rare, but the failure mode looks like a

match.  Can you apply the following patch and see if it helps your

performance / corruption problems?

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c

index db606b6..7ad08ce 100644

--- a/drivers/net/tg3.c

+++ b/drivers/net/tg3.c

@@ -11409,6 +11409,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)

                tp->tg3_flags |= TG3_FLAG_PCI_HIGH_SPEED;

        if ((pci_state_reg & PCISTATE_BUS_32BIT) != 0)

                tp->tg3_flags |= TG3_FLAG_PCI_32BIT;

+       else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701)

+               tp->grc_mode |= GRC_MODE_FORCE_PCI32BIT;

        /* Chip-specific fixup from Broadcom driver */

        if ((tp->pci_chip_rev_id == CHIPREV_ID_5704_A0) &&

```

Again I made the change to match rev 5752.

3) These two patches made the ground to apply this patch and hopefully fix the problem:

```
The following patch fixes the problem for me.  Do we want to accept this

patch and call it a day or continue investigating the source of the problem?

Patch applies to 2.6.24.2, but doesn't apply to 2.6.25-rc.  If everyone

agrees that this is the right solution, I will resubmit with a proper

subject line and description.

Tony

--- linux-2.6.24.2/include/net/sock.h.orig      2008-02-20 17:19:20.000000000 -0500

+++ linux-2.6.24.2/include/net/sock.h   2008-02-20 17:25:55.000000000 -0500

@@ -1236,8 +1236,10 @@ static inline struct sk_buff *sk_stream_

 {

        struct sk_buff *skb;

-       /* The TCP header must be at least 32-bit aligned.  */

-       size = ALIGN(size, 4);

+       /* The TCP header must be at least 32-bit aligned, but some chipsets

+        * such as Broadcom BCM5701 require at least 16-byte alignment.

+        */

+       size = ALIGN(size, 16);

        skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);

        if (skb) { 

```

I've kernel 2.6.25-rc5 so this patch didn't work out of the box. I had to edit net/ipv4/tcp.c instead. Mind you that this patch alone didn't work for me without the previous two.

Right now the minor testing I've done seem to indicate that the problem has gone...

Hope this helps!

Claudio

----------

## CDiMa

 *CDiMa wrote:*   

> Hope this helps!

 

No, it doesn't... I'm still having problems with the nic...

----------

