nbd umira

Dalibor Straka dast na panelnet.cz
Středa Listopad 23 06:59:47 CET 2005


Ahoj,

na opteronu mi umira nbd. Zkousel jsem debiani balik i nejnovejsi
verzi z nbd.sf.net. Strace na toho demona (bez -f) ceka ve stavu
" accept(3,   " a kdyz se pripoji klient objevi se zpracovani pozadavku
a vyforkovani na dite. Prvnich par klientu bezi v pohode:

---------------------------------------------------------------------
(par stat("....) jsem smazal)
accept(3, {sa_family=AF_INET, sin_port=htons(1024),
   sin_addr=inet_addr("147.32.193.223")}, [1019783849212641296]) = 4
getpeername(4, {sa_family=AF_INET, sin_port=htons(1024),
sin_addr=inet_addr("147.32.193.223")}, [22668081474043920]) = 0
sendto(5, "<30>Nov 22 16:21:30 nbd_server[2"..., 120, MSG_NOSIGNAL,
   NULL, 0) = 120
open(NULL, O_RDONLY)                    = -1 EFAULT (Bad address)
sendto(5, "<30>Nov 22 16:21:30 nbd_server[2"..., 90, MSG_NOSIGNAL, NULL,
   0) = 90
sendto(5, "<30>Nov 22 16:21:30 nbd_server[2"..., 56, MSG_NOSIGNAL, NULL,
   0) = 56
clone(child_stack=0,
   flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
   child_tidptr=0x2aaaaaf90d10) = 28994
close(4)                                = 0
---------------------------------------------------------------------

jenze po tvrdem rebootu nejakeho klienta mu zrejme umre dite a demon to
nerozdycha. Strace opet bez -f vypada takto:

---------------------------------------------------------------------
accept(3, 0x7fffffa5db40, [1019783849212641296]) = ? ERESTARTSYS (To be
    restarted)
--- SIGCHLD (Child exited) @ 0 (0) ---
wait4(-1, NULL, 0, NULL)                = 28994
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=806, ...}) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=806, ...}) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=806, ...}) = 0
sendto(5, "<30>Nov 22 16:37:18 nbd_server[2"..., 58, MSG_NOSIGNAL, NULL,
    0) = 58
wait4(-1,
---------------------------------------------------------------------

a demon ceka ve stavu "wait4(-1, ", pripojit se na nej da (telnetem),
ale nereaguje a neakceptuje nic, je proste mrtvy. Koukal jsem na ten kus
kodu, ale neni mi jasny proc umre tata, kdyz mu nejake dite zkolabuje?
Kod z pietnich duvodu prikladam:

---------------------------------------------------------------------
int serveloop(SERVER* serve) {
    struct sockaddr_in addrin;
    socklen_t addrinlen=sizeof(addrin);
    for(;;) { 
        CLIENT *client;
        int net;
        pid_t *pid;

        DEBUG("accept, ");
        if ((net = accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0)
            err("accept: %m");

        client = g_malloc(sizeof(CLIENT));
        client->server=serve;
        client->exportsize=OFFT_MAX;
        client->net=net;
        set_peername(net, client);
        if (!authorized_client(client)) {
            msg2(LOG_INFO,"Unauthorized client") ;
            close(net) ;
            continue ;
        }
        msg2(LOG_INFO,"Authorized client") ;
        pid=g_malloc(sizeof(pid_t));
#ifndef NOFORK
        if ((*pid=fork())<0) {
            msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
            close(net) ;
            continue ;
        }
        if (*pid>0) { /* parent */
            close(net);
            g_hash_table_insert(children, pid, pid);
            continue;
        }
        /* child */
        g_hash_table_destroy(children);
        close(serve->socket) ;
#endif // NOFORK
        msg2(LOG_INFO,"Starting to serve") ;
        serveconnection(client);
    }
}


-- Dalibor Straka


Další informace o konferenci Linux