Date: prev next · Thread: first prev next last
2012 Archives by date, by thread · List index


On Monday 19 of March 2012, Michael Meeks wrote:
On Mon, 2012-03-19 at 07:33 +0100, Lubos Lunak wrote:
 Oh, I see. I've already noticed this myself, and that's a good
explanation for Voreppe's (lack of) builds. That's a rather bad bug for
tinderbox builds, and we really could use a tinderbox watching over our
commits breaking the MSVC build (I think I fixed 5 MSVC regressions last
week at the very least).

      Yep - perhaps a re-boot-box-and-restart-build-after-4-hours of no
watchdog ping or something ? :-)

 Nah, so crude :). I've written a make watchdog, it's currently being tested 
on the Win-x86_6-fast tinderbox to see how it works in practice.

      Anyhow - glad to see you Windows-ised :-)

 Did anyone say I was staying 8-O ?

-- 
 Lubos Lunak
 l.lunak@suse.cz
TIMEOUT=1000

CXX=g++
CXXFLAGS=-O2

.PHONY: clean test_clean all test perform_test

all:
        $(CXX) -Wall $(CXXFLAGS) makewatchdog.cpp -o makewatchdog

test: all test_clean
        ./makewatchdog log 20 2 $(MAKE) -j2 perform_test

perform_test: first second

first: output
        cp output first

second: output2
        cp output2 second

output:
        ( sleep $(TIMEOUT); echo kuk ) >output

output2:
        ( sleep $(TIMEOUT); echo kuk ) >output2

test_clean:
        rm -f first second output output2 log

clean: test_clean
        rm -f makewatchdog
/*

  Copyright (c) 2012 Lubos Lunak <l.lunak@suse.cz>

  Permission is hereby granted, free of charge, to any person obtaining a
  copy of this software and associated documentation files (the "Software"),
  to deal in the Software without restriction, including without limitation
  the rights to use, copy, modify, merge, publish, distribute, sublicense,
  and/or sell copies of the Software, and to permit persons to whom the
  Software is furnished to do so, subject to the following conditions:

  The above copyright notice and this permission notice shall be included in
  all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  DEALINGS IN THE SOFTWARE.

*/

#include <algorithm>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <string.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include <vector>

//#define DEBUG

using namespace std;

static int usage( const char* argv0 )
    {
    printf( "Usage: %s [outputFile] [timeout] [maxAttempts] [make command...]\n", argv0 );
    return 0;
    }

const int FAILURE = 3; // do not use 1 or 2 (check exit values make uses)
#define NAME "Make watchdog: "

struct ProcInfo
    {
    pid_t pid;
    pid_t parent;
    string cmdline;
    };

typedef vector< ProcInfo > ProcInfoList;

static ProcInfoList findAllProcesses()
    {
    ProcInfoList procInfos;
    DIR* dir = opendir( "/proc/" );
    if( dir == NULL )
        {
        fprintf( stderr, NAME "Cannot read /proc.\n" );
        return procInfos;
        }
    while( dirent* entry = readdir( dir ))
        {
        char buf[ 16384 ];
        ProcInfo procInfo;
        procInfo.pid = atoi( entry->d_name );
        if( procInfo.pid == 0 )
            continue;
        if( FILE* f = fopen(( string( "/proc/" ) + entry->d_name + "/stat" ).c_str(), "r" ))
            {
            int size = fread( buf, 1, sizeof( buf ) - 1, f );
            if( ferror( f ))
                {
#ifdef DEBUG
                fprintf( stderr, "Cannot read stat for %s\n", entry->d_name );
#endif
                fclose( f );
                continue;
                }
            buf[ size ] = '\0';
            fclose( f );
            procInfo.parent = 0;
            if( const char* lparen = strchr( buf, '(' ))
                if( const char* rparen = strrchr( lparen, ')' ))
                    sscanf( rparen + 2, "%*c %d", &procInfo.parent );
            if( procInfo.parent == 0 )
                continue;
            if( procInfo.pid == procInfo.parent )
                continue; // just in case
            }
        else
            {
#ifdef DEBUG
            fprintf( stderr, "Cannot open stat for %s\n", entry->d_name );
#endif
            continue;
            }
        if( FILE* f = fopen(( string( "/proc/" ) + entry->d_name + "/cmdline" ).c_str(), "r" ))
            {
            *buf = '\0';
            fscanf( f, "%s", buf );
            fclose( f );
            procInfo.cmdline = buf;
            }
        else
            { // not an error
#ifdef DEBUG
            fprintf( stderr, "Cannot read cmdline for %s\n", entry->d_name );
#endif
            }
        // ok
        procInfos.push_back( procInfo );
        }
    closedir( dir );
    return procInfos;
    }

static void findToKillRecursive( pid_t parent, const ProcInfoList& allProcesses, ProcInfoList* 
toKill )
    {
    for( unsigned int i = 0;
         i < allProcesses.size();
         ++i )
        if( allProcesses[ i ].parent == parent )
            {
            findToKillRecursive( allProcesses[ i ].pid, allProcesses, toKill );
            toKill->push_back( allProcesses[ i ] );
            }
    }

static vector< ProcInfo > findToKill( pid_t topParent )
    {
    ProcInfoList allProcesses = findAllProcesses();
    ProcInfoList toKill;
    findToKillRecursive( topParent, allProcesses, &toKill );
#ifdef DEBUG
    bool found = false;
#endif
    for( unsigned int i = 0;
         i < allProcesses.size();
         ++i )
        if( allProcesses[ i ].pid == topParent )
            {
            toKill.push_back( allProcesses[ i ] );
#ifdef DEBUG
            found = true;
#endif
            break;
            }
#ifdef DEBUG
    if( !found )
        fprintf( stderr, "Top parent process info not found.\n" );
#endif
    return toKill;
    }

// I hope I got this one right
static int makeExitCode( int status )
    {
    if( WIFEXITED( status ))
        return WEXITSTATUS( status );
    if( WIFSIGNALED( status ))
        return 128 + WTERMSIG( status );
    return FAILURE;
    }

enum KillStatus
    {
    SuccessfullExit, // exited cleanly
    KilledInterrupted, // was interrupted (cleanly)
    KilledForced       // force killed (not clean)
    };

static int killMake( pid_t pid, KillStatus* killed )
    {
#ifdef DEBUG
    fprintf( stderr, "Going to kill pid %d.\n", pid );
#endif
    ProcInfoList toKill = findToKill( pid );
    // SIGINT first
    for( unsigned i = 0;
         i < toKill.size();
         ++i )
        kill( toKill[ i ].pid, SIGINT );
    time_t t = time( NULL );
    while( t + 10 > time( NULL ))
        sleep( 2 ); // may get interrupted by a signal
    int status;
    bool pidHasFinished = false;
    // need to clean up the top parent
    if( waitpid( pid, &status, WNOHANG ) >= 0 )
        {
        pidHasFinished = true;
        *killed = KilledInterrupted;
        }
    // now forcibly
    for( unsigned i = 0;
         i < toKill.size();
         ++i )
        {
        if( kill( toKill[ i ].pid, 0 ) == 0 ) // still alive?
            {
            *killed = KilledForced; // unclear cleanup
            fprintf( stderr, NAME "Process %d not interrupted, forcibly killing.\n", toKill[ i 
].pid );
            fprintf( stderr, NAME "Cmdline: %s\n", toKill[ i ].cmdline.c_str());
            kill( toKill[ i ].pid, SIGKILL );
            }
        }
    if( !pidHasFinished )
        waitpid( pid, &status, 0 );
    return makeExitCode( status );
    }

bool makeNonBlocking( int fd )
    {
    int options = fcntl( fd, F_GETFL );
    if( options < 0 )
        {
        perror( NAME "fcntl( F_GETFL )" );
        return false;
        }
    if( fcntl( fd, F_SETFL, O_NONBLOCK | O_CLOEXEC ) < 0 )
        {
        perror( NAME "fcntl( F_SETFL )" );
        return false;
        }
    return true;
    }

static int childPipeWrite;

static void childHandler( int )
    {
    char c = '\0';
    write( childPipeWrite, &c, 1 );
    }

static int watchMake( pid_t pid, KillStatus* killed, int timeout, int watchFd )
    {
    int pipeFd[ 2 ];
    if( pipe( pipeFd ) < 0 )
        {
        perror( NAME "pipe()" );
        return FAILURE;
        }
    childPipeWrite = pipeFd[ 1 ];
    int childPipeRead = pipeFd[ 0 ];
    if( !makeNonBlocking( childPipeRead ))
        return FAILURE;
    struct sigaction act;
    act.sa_handler = childHandler;
    sigemptyset( &act.sa_mask );
    act.sa_flags = SA_NOCLDSTOP;
#ifdef SA_RESTART
    act.sa_flags |= SA_RESTART;
#endif
    sigaction( SIGCHLD, &act, NULL );
    time_t lastActivity = time( NULL );
    for(;;)
        {
        // select() cannot be used to watch for more data in watchFd, because it is a file
        // and our reading position will be at its end for most of the time, meaning that
        // select() will signal the fd is ready to read (i.e. eof)
#ifdef DEBUG
        sleep( 2 );
#else
        sleep( 10 );
#endif
        char buf[ 1024 ];
        if( read( childPipeRead, buf, 1 ) > 0 )
            {
#ifdef DEBUG
            fprintf( stderr, "Child exited\n" );
#endif
            int status;
            while( waitpid( pid, &status, 0 ) < 0 && errno == EINTR )
                ;
            signal( SIGCHLD, SIG_DFL );
            return makeExitCode( status );
            }
        if( read( watchFd, buf, sizeof( buf )) > 0 )
            {
#ifdef DEBUG
            fprintf( stderr, "Activity in output file.\n" );
#endif
            while( read( watchFd, buf, sizeof( buf )) > 0 )
                    ;
            lastActivity = time( NULL );
            }
        else if( lastActivity + timeout < time( NULL ))
            { // timeout
#ifdef DEBUG
            fprintf( stderr, "Activity timeout.\n" );
#endif
            signal( SIGCHLD, SIG_DFL );
            return killMake( pid, killed );
            }
        }
    }

static int runMake( int argc, char** argv, KillStatus* killed, int timeout, int watchFd )
    {
    pid_t pid = fork();
    switch( pid )
        {
        default: // parent
            return watchMake( pid, killed, timeout, watchFd );
        case 0: // child
            close( watchFd );
            execvp( argv[ 0 ], argv );
            break;
        case -1: // failure
            perror( NAME "fork()" );
            break;
        }
    fprintf( stderr, NAME "Make command invocation failed.\n" );
    return FAILURE;
    }

static int setupOutputFile( const char* outputFileName )
    {
    int outputFd = open( outputFileName, O_APPEND | O_CREAT | O_TRUNC | O_WRONLY, 0644 );
    if( outputFd < 0 )
        {
        fprintf( stderr, NAME "Opening output file %s for writing failed: %s\n", outputFileName, 
strerror( errno ));
        return -1;
        }
    if( dup2( outputFd, STDOUT_FILENO ) < 0 )
        {
        perror( NAME "dup2()" );
        return -1;
        }
    if( dup2( outputFd, STDERR_FILENO ) < 0 )
        {
        perror( NAME "dup2()" );
        return -1;
        }
    close( outputFd );
    int watchFd = open( outputFileName, O_RDONLY );
    if( watchFd < 0 )
        {
        perror( NAME "open()" );
        return -1;
        }
    if( !makeNonBlocking( watchFd ))
        return -1;
    return watchFd;
    }

int main( int argc, char** argv )
    {
    if( argc < 4 )
        return usage( argv[ 0 ] );
    const char* outputFileName = argv[ 1 ];
    int timeout = atoi( argv[ 2 ] );
    int attempts = atoi( argv[ 3 ] );
    int watchFd = setupOutputFile( outputFileName );
    if( watchFd < 0 )
        return FAILURE;
    int exitcode = 0;
    for( int attempt = 1;
         attempt <= attempts;
         ++attempt )
        {
        KillStatus killed = SuccessfullExit;
        exitcode = runMake( argc - 4, argv + 4, &killed, timeout, watchFd );
        switch( killed )
            {
            case SuccessfullExit:
                break;
            case KilledInterrupted:
                if( attempt == attempts )
                    fprintf( stderr, NAME "Error: Make command timed out, maximum number of 
attempts reached,"
                        " failing, exit code %d.\n", exitcode );
                else
                    fprintf( stderr, NAME "Error: Make command timed out, attempt %d/%d, 
interrupting"
                        " and retrying.\n", attempt, attempts );
                break;
            case KilledForced:
                fprintf( stderr, NAME "Error: Make command timed out, force killed, failing,"
                    " exit code %d\n", exitcode );
                attempt = attempts + 1; // break out of the loop
                break;
            }
        }
    return exitcode;
    }

Attachment: windows.sh
Description: application/shellscript


Context


Privacy Policy | Impressum (Legal Info) | Copyright information: Unless otherwise specified, all text and images on this website are licensed under the Creative Commons Attribution-Share Alike 3.0 License. This does not include the source code of LibreOffice, which is licensed under the Mozilla Public License (MPLv2). "LibreOffice" and "The Document Foundation" are registered trademarks of their corresponding registered owners or are in actual use as trademarks in one or more countries. Their respective logos and icons are also subject to international copyright laws. Use thereof is explained in our trademark policy.