From 11617d8ab5192c12e3b33cd9c08ac32f1d334a85 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 30 Sep 2010 14:17:34 +0200 Subject: omhdfs: first shot at this new module (very rough PoC code) --- Makefile.am | 4 + configure.ac | 19 +++ plugins/omhdfs/Makefile.am | 6 + plugins/omhdfs/omhdfs.c | 296 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 325 insertions(+) create mode 100644 plugins/omhdfs/Makefile.am create mode 100644 plugins/omhdfs/omhdfs.c diff --git a/Makefile.am b/Makefile.am index bc6d8dd0..c7646e3d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -127,6 +127,10 @@ if ENABLE_OMUXSOCK SUBDIRS += plugins/omuxsock endif +if ENABLE_OMHDFS +SUBDIRS += plugins/omhdfs +endif + if ENABLE_OMTEMPLATE SUBDIRS += plugins/omtemplate endif diff --git a/configure.ac b/configure.ac index 1050981b..069a8fd8 100644 --- a/configure.ac +++ b/configure.ac @@ -1003,6 +1003,23 @@ AM_CONDITIONAL(ENABLE_OMTEMPLATE, test x$enable_omtemplate = xyes) # end of copy template - be sure to search for omtemplate to find everything! +# settings for the omhdfs; +AC_ARG_ENABLE(omhdfs, + [AS_HELP_STRING([--enable-omhdfs],[Compiles omhdfs template module @<:@default=no@:>@])], + [case "${enableval}" in + yes) enable_omhdfs="yes" ;; + no) enable_omhdfs="no" ;; + *) AC_MSG_ERROR(bad value ${enableval} for --enable-omhdfs) ;; + esac], + [enable_omhdfs=no] +) +# +# you may want to do some library checks here - see snmp, mysql, pgsql modules +# for samples +# +AM_CONDITIONAL(ENABLE_OMHDFS, test x$enable_omhdfs = xyes) + + AC_CONFIG_FILES([Makefile \ runtime/Makefile \ tools/Makefile \ @@ -1016,6 +1033,7 @@ AC_CONFIG_FILES([Makefile \ plugins/imklog/Makefile \ plugins/imtemplate/Makefile \ plugins/omtemplate/Makefile \ + plugins/omhdfs/Makefile \ plugins/omprog/Makefile \ plugins/omstdout/Makefile \ plugins/pmrfc3164sd/Makefile \ @@ -1070,6 +1088,7 @@ echo "---{ output plugins }---" echo " Mail support enabled: $enable_mail" echo " omprog module will be compiled: $enable_omprog" echo " omstdout module will be compiled: $enable_omstdout" +echo " omhdfs module will be compiled: $enable_omhdfs" echo " omruleset module will be compiled: $enable_omruleset" echo " omdbalerting module will be compiled: $enable_omdbalerting" echo " omudpspoof module will be compiled: $enable_omudpspoof" diff --git a/plugins/omhdfs/Makefile.am b/plugins/omhdfs/Makefile.am new file mode 100644 index 00000000..329e6816 --- /dev/null +++ b/plugins/omhdfs/Makefile.am @@ -0,0 +1,6 @@ +pkglib_LTLIBRARIES = omhdfs.la + +omhdfs_la_SOURCES = omhdfs.c +omhdfs_la_CPPFLAGS = -I$(top_srcdir) $(PTHREADS_CFLAGS) $(RSRT_CFLAGS) +omhdfs_la_LDFLAGS = -module -avoid-version +omhdfs_la_LIBADD = diff --git a/plugins/omhdfs/omhdfs.c b/plugins/omhdfs/omhdfs.c new file mode 100644 index 00000000..42e5dc8a --- /dev/null +++ b/plugins/omhdfs/omhdfs.c @@ -0,0 +1,296 @@ +/* omhdfs.c + * This is the implementation of the build-in file output module. + * + * NOTE: read comments in module-template.h to understand how this file + * works! + * + * Copyright 2010 Rainer Gerhards and Adiscon GmbH. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * A copy of the GPL can be found in the file "COPYING" in this distribution. + */ + +/* this is kind of a hack, if defined, it instructs omhdfs to use + * the regular (non-hdfs) file system calls. This eases development + * (and hopefully troubleshooting) especially in cases when no + * hdfs environment is available. + */ +#define USE_REGULAR_FS 1 + +#include "config.h" +#include "rsyslog.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "syslogd-types.h" +#include "srUtils.h" +#include "template.h" +#include "conf.h" +#include "cfsysline.h" +#include "module-template.h" +#include "unicode-helper.h" +#ifndef USE_REGULAR_FS +# include "hdfs.h" +#endif + +MODULE_TYPE_OUTPUT + +/* internal structures + */ +DEF_OMOD_STATIC_DATA + +/* globals for default values */ +static uchar *fileName = NULL; +/* end globals for default values */ + +typedef struct _instanceData { + uchar *fileName; +# ifdef USE_REGULAR_FS + short fd; /* file descriptor for (current) file */ +# else + hdfsFS fs; + hdfsFile fd; +# endif +} instanceData; + + +BEGINisCompatibleWithFeature +CODESTARTisCompatibleWithFeature + if(eFeat == sFEATURERepeatedMsgReduction) + iRet = RS_RET_OK; +ENDisCompatibleWithFeature + + +BEGINdbgPrintInstInfo +CODESTARTdbgPrintInstInfo + printf("omhdfs: file:%s", pData->fileName); + if (pData->fd == -1) + printf(" (unused)"); +ENDdbgPrintInstInfo + + + +#if 0 +static void prepareFile(instanceData *pData, uchar *newFileName) +{ + if(access((char*)newFileName, F_OK) == 0) { + /* file already exists */ + pData->fd = open((char*) newFileName, O_WRONLY|O_APPEND|O_CREAT|O_NOCTTY, + pData->fCreateMode); + } else { + pData->fd = -1; + /* file does not exist, create it (and eventually parent directories */ + if(pData->bCreateDirs) { + /* we fist need to create parent dirs if they are missing + * We do not report any errors here ourselfs but let the code + * fall through to error handler below. + */ + if(makeFileParentDirs(newFileName, strlen((char*)newFileName), + pData->fDirCreateMode, pData->dirUID, + pData->dirGID, pData->bFailOnChown) != 0) { + return; /* we give up */ + } + } + /* no matter if we needed to create directories or not, we now try to create + * the file. -- rgerhards, 2008-12-18 (based on patch from William Tisater) + */ + pData->fd = open((char*) newFileName, O_WRONLY|O_APPEND|O_CREAT|O_NOCTTY, + pData->fCreateMode); + if(pData->fd != -1) { + /* check and set uid/gid */ + if(pData->fileUID != (uid_t)-1 || pData->fileGID != (gid_t) -1) { + /* we need to set owner/group */ + if(fchown(pData->fd, pData->fileUID, + pData->fileGID) != 0) { + if(pData->bFailOnChown) { + int eSave = errno; + close(pData->fd); + pData->fd = -1; + errno = eSave; + } + /* we will silently ignore the chown() failure + * if configured to do so. + */ + } + } + } + } +} +#endif + +static void prepareFile(instanceData *pData, uchar *newFileName) +{ +# if USE_REGULAR_FS + pData->fd = open((char*) newFileName, O_WRONLY|O_APPEND|O_CREAT|O_NOCTTY, 0666); +# else + pData->fs = hdfsConnect("default", 0); + pData->fd = hdfsOpenFile(fs, newFileName, O_WRONLY|O_CREAT, 0, 0, 0); + if(!pData->fd) { + dbgprintf(stderr, "Failed to open %s for writing!\n", newFileName); + // TODO: suspend/error report + } + +# endif +} + +static rsRetVal writeFile(uchar **ppString, unsigned iMsgOpts, instanceData *pData) +{ + size_t lenWrite; + DEFiRet; + +# if USE_REGULAR_FS + if (write(pData->fd, ppString[0], strlen((char*)ppString[0])) < 0) { + int e = errno; + dbgprintf("omhdfs write error!\n"); + + /* If the filesystem is filled up, just ignore + * it for now and continue writing when possible + */ + //if(pData->fileType == eTypeFILE && e == ENOSPC) + //return RS_RET_OK; + + //(void) close(pData->fd); + iRet = RS_RET_DISABLE_ACTION; + errno = e; + //logerror((char*) pData->f_fname); + } +# else + lenWrite = strlen(char*ppString[0]); + tSize num_written_bytes = hdfsWrite(pData->fs, pData->fd, ppString[0], lenWrite); + if(num_written_bytes != lenWrite) { + dbgprintf("Failed to write %s, expected %lu bytes, written %lu\n", pData->fileName, + lenWrite, num_written_bytes); + // TODO: suspend/error report + } +# endif +else { dbgprintf("omhdfs has successfully written to file\n"); } + + RETiRet; +} + + +BEGINcreateInstance +CODESTARTcreateInstance + pData->fd = -1; +ENDcreateInstance + + +BEGINfreeInstance +CODESTARTfreeInstance +# ifdef USE_REGULAR_FS + if(pData->fd != -1) + close(pData->fd); +# else + hdfsCloseFile(pData->fs, pData->fd); +# endif +ENDfreeInstance + + +BEGINtryResume +CODESTARTtryResume +ENDtryResume + +BEGINdoAction +CODESTARTdoAction + dbgprintf(" (%s)\n", pData->fileName); + iRet = writeFile(ppString, iMsgOpts, pData); +ENDdoAction + + +BEGINparseSelectorAct +CODESTARTparseSelectorAct + + /* first check if this config line is actually for us */ + if(strncmp((char*) p, ":omhdfs:", sizeof(":omhdfs:") - 1)) { + ABORT_FINALIZE(RS_RET_CONFLINE_UNPROCESSED); + } + + /* ok, if we reach this point, we have something for us */ + p += sizeof(":omhdfs:") - 1; /* eat indicator sequence (-1 because of '\0'!) */ + CHKiRet(createInstance(&pData)); + + CODE_STD_STRING_REQUESTparseSelectorAct(1) + /* rgerhards 2004-11-17: from now, we need to have different + * processing, because after the first comma, the template name + * to use is specified. So we need to scan for the first coma first + * and then look at the rest of the line. + */ + CHKiRet(cflineParseTemplateName(&p, *ppOMSR, 0, 0, (uchar*) "RSYSLOG_FileFormat")); + //(pszFileDfltTplName == NULL) ? (uchar*)"RSYSLOG_FileFormat" : pszFileDfltTplName)); + + // TODO: check for NULL filename + CHKmalloc(pData->fileName = ustrdup(fileName)); + + prepareFile(pData, pData->fileName); + +#if 0 + if ( pData->fd < 0 ){ + pData->fd = -1; + dbgprintf("Error opening log file: %s\n", pData->f_fname); + logerror((char*) pData->f_fname); + } +#endif +CODE_STD_FINALIZERparseSelectorAct +ENDparseSelectorAct + + +/* Reset config variables for this module to default values. + * rgerhards, 2007-07-17 + */ +static rsRetVal resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unused)) *pVal) +{ +/* + fileUID = -1; + fileGID = -1; + dirUID = -1; + dirGID = -1; + bFailOnChown = 1; + iDynaFileCacheSize = 10; + fCreateMode = 0644; + fDirCreateMode = 0700; + bCreateDirs = 1; +*/ + return RS_RET_OK; +} + + +BEGINmodExit +CODESTARTmodExit +ENDmodExit + + +BEGINqueryEtryPt +CODESTARTqueryEtryPt +CODEqueryEtryPt_STD_OMOD_QUERIES +ENDqueryEtryPt + + +BEGINmodInit() +CODESTARTmodInit + *ipIFVersProvided = CURR_MOD_IF_VERSION; +CODEmodInit_QueryRegCFSLineHdlr + CHKiRet(omsdRegCFSLineHdlr((uchar *)"omhdfsfilename", 0, eCmdHdlrGetWord, NULL, &fileName, NULL)); + CHKiRet(omsdRegCFSLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, STD_LOADABLE_MODULE_ID)); +CODEmodInit_QueryRegCFSLineHdlr +ENDmodInit -- cgit From 7ac7ad166b82034eea4a37c1937ca5ddd618ec45 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 30 Sep 2010 13:55:26 +0000 Subject: omhdfs: added "real" libhdfs code, now actually works on hdfs very crude implementation, but probably good enough to gather some early performance data and experience with the module. No real error handling done, if something breaks, the whole thing will be blown up ;) --- plugins/omhdfs/Makefile.am | 4 +-- plugins/omhdfs/omhdfs.c | 62 +++++++++++++++++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/plugins/omhdfs/Makefile.am b/plugins/omhdfs/Makefile.am index 329e6816..23fcf75a 100644 --- a/plugins/omhdfs/Makefile.am +++ b/plugins/omhdfs/Makefile.am @@ -1,6 +1,6 @@ pkglib_LTLIBRARIES = omhdfs.la omhdfs_la_SOURCES = omhdfs.c -omhdfs_la_CPPFLAGS = -I$(top_srcdir) $(PTHREADS_CFLAGS) $(RSRT_CFLAGS) -omhdfs_la_LDFLAGS = -module -avoid-version +omhdfs_la_CPPFLAGS = -I$(top_srcdir) $(PTHREADS_CFLAGS) $(RSRT_CFLAGS) -I/usr/lib/jvm/java-6-sun/include -I/usr/lib/jvm/java-6-sun/include/linux +omhdfs_la_LDFLAGS = -module -avoid-version -lhdfs -L/usr/lib/jvm/java-6-sun-1.6.0.20/jre/lib/amd64 -L/usr/lib/jvm/java-6-sun-1.6.0.20/jre/lib/amd64/server -ljava -ljvm -lverify omhdfs_la_LIBADD = diff --git a/plugins/omhdfs/omhdfs.c b/plugins/omhdfs/omhdfs.c index 42e5dc8a..fcdff6e5 100644 --- a/plugins/omhdfs/omhdfs.c +++ b/plugins/omhdfs/omhdfs.c @@ -28,7 +28,7 @@ * (and hopefully troubleshooting) especially in cases when no * hdfs environment is available. */ -#define USE_REGULAR_FS 1 +//#define USE_REGULAR_FS 1 #include "config.h" #include "rsyslog.h" @@ -62,6 +62,8 @@ DEF_OMOD_STATIC_DATA /* globals for default values */ static uchar *fileName = NULL; +static uchar *hdfsHost = NULL; +int hdfsPort = 0; /* end globals for default values */ typedef struct _instanceData { @@ -71,6 +73,8 @@ typedef struct _instanceData { # else hdfsFS fs; hdfsFile fd; + const char *hdfsHost; + tPort hdfsPort; # endif } instanceData; @@ -85,8 +89,8 @@ ENDisCompatibleWithFeature BEGINdbgPrintInstInfo CODESTARTdbgPrintInstInfo printf("omhdfs: file:%s", pData->fileName); - if (pData->fd == -1) - printf(" (unused)"); + //if (pData->fd == -1) + //printf(" (unused)"); ENDdbgPrintInstInfo @@ -139,22 +143,41 @@ static void prepareFile(instanceData *pData, uchar *newFileName) } #endif -static void prepareFile(instanceData *pData, uchar *newFileName) +static void +prepareFile(instanceData *pData, uchar *newFileName) { # if USE_REGULAR_FS pData->fd = open((char*) newFileName, O_WRONLY|O_APPEND|O_CREAT|O_NOCTTY, 0666); # else - pData->fs = hdfsConnect("default", 0); - pData->fd = hdfsOpenFile(fs, newFileName, O_WRONLY|O_CREAT, 0, 0, 0); + dbgprintf("omhdfs: try to connect to host '%s' at port %d\n", + pData->hdfsHost, pData->hdfsPort); + pData->fs = hdfsConnect(pData->hdfsHost, pData->hdfsPort); + if(pData->fs == NULL) { + dbgprintf("omhdfs: error can not connect to hdfs\n"); + } + pData->fd = hdfsOpenFile(pData->fs, (char*)newFileName, O_WRONLY|O_APPEND, 0, 0, 0); + if(pData->fd == NULL) { + /* maybe the file does not exist, so we try to create it now. + * Note that we can not use hdfsExists() because of a deficit in + * it: https://issues.apache.org/jira/browse/HDFS-1154 + * As of my testing, libhdfs at least seems to return ENOENT if + * the file does not exist. + */ + if(errno == ENOENT) { + dbgprintf("omhdfs: ENOENT trying to append to '%s', now trying create\n", + newFileName); + pData->fd = hdfsOpenFile(pData->fs, (char*)newFileName, O_WRONLY|O_CREAT, 0, 0, 0); + } + } if(!pData->fd) { - dbgprintf(stderr, "Failed to open %s for writing!\n", newFileName); + dbgprintf("omhdfs: failed to open %s for writing!\n", newFileName); // TODO: suspend/error report } # endif } -static rsRetVal writeFile(uchar **ppString, unsigned iMsgOpts, instanceData *pData) +static rsRetVal writeFile(uchar **ppString, instanceData *pData) { size_t lenWrite; DEFiRet; @@ -176,15 +199,14 @@ static rsRetVal writeFile(uchar **ppString, unsigned iMsgOpts, instanceData *pDa //logerror((char*) pData->f_fname); } # else - lenWrite = strlen(char*ppString[0]); + lenWrite = strlen((char*) ppString[0]); tSize num_written_bytes = hdfsWrite(pData->fs, pData->fd, ppString[0], lenWrite); - if(num_written_bytes != lenWrite) { - dbgprintf("Failed to write %s, expected %lu bytes, written %lu\n", pData->fileName, - lenWrite, num_written_bytes); + if((unsigned) num_written_bytes != lenWrite) { + dbgprintf("omhdfs: failed to write %s, expected %lu bytes, written %lu\n", pData->fileName, + lenWrite, (unsigned long) num_written_bytes); // TODO: suspend/error report } # endif -else { dbgprintf("omhdfs has successfully written to file\n"); } RETiRet; } @@ -192,7 +214,7 @@ else { dbgprintf("omhdfs has successfully written to file\n"); } BEGINcreateInstance CODESTARTcreateInstance - pData->fd = -1; + //pData->fd = -1; ENDcreateInstance @@ -214,7 +236,7 @@ ENDtryResume BEGINdoAction CODESTARTdoAction dbgprintf(" (%s)\n", pData->fileName); - iRet = writeFile(ppString, iMsgOpts, pData); + iRet = writeFile(ppString, pData); ENDdoAction @@ -241,6 +263,12 @@ CODESTARTparseSelectorAct // TODO: check for NULL filename CHKmalloc(pData->fileName = ustrdup(fileName)); + if(hdfsHost == NULL) { + pData->hdfsHost = "default"; + } else { + CHKmalloc(pData->hdfsHost = strdup((char*)hdfsHost)); + } + pData->hdfsPort = hdfsPort; prepareFile(pData, pData->fileName); @@ -271,6 +299,8 @@ static rsRetVal resetConfigVariables(uchar __attribute__((unused)) *pp, void __a fDirCreateMode = 0700; bCreateDirs = 1; */ + hdfsHost = NULL; + hdfsPort = 0; return RS_RET_OK; } @@ -291,6 +321,8 @@ CODESTARTmodInit *ipIFVersProvided = CURR_MOD_IF_VERSION; CODEmodInit_QueryRegCFSLineHdlr CHKiRet(omsdRegCFSLineHdlr((uchar *)"omhdfsfilename", 0, eCmdHdlrGetWord, NULL, &fileName, NULL)); + CHKiRet(omsdRegCFSLineHdlr((uchar *)"omhdfshost", 0, eCmdHdlrGetWord, NULL, &hdfsHost, NULL)); + CHKiRet(omsdRegCFSLineHdlr((uchar *)"omhdfsport", 0, eCmdHdlrInt, NULL, &hdfsPort, NULL)); CHKiRet(omsdRegCFSLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, STD_LOADABLE_MODULE_ID)); CODEmodInit_QueryRegCFSLineHdlr ENDmodInit -- cgit From 1d609fc5ab9b5d1c89d5d7d8f321c4e8493b4437 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 1 Oct 2010 10:27:44 +0200 Subject: omhdfs: some cleanup of build system but still pretty ugly. Any java folks out there to help clean it up? --- doc/omhdfs.html | 49 +++++++++++++++++++++++++++++++++++++++++++ doc/rsyslog_conf_modules.html | 1 + plugins/omhdfs/Makefile.am | 5 +++-- 3 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 doc/omhdfs.html diff --git a/doc/omhdfs.html b/doc/omhdfs.html new file mode 100644 index 00000000..827697b6 --- /dev/null +++ b/doc/omhdfs.html @@ -0,0 +1,49 @@ + +rsyslog output module for HDFS (omhdfs) +back + + +

Unix sockets Output Module (omhdfs)

+

Module Name:    omhdfs

+

Available since:    5.7.2

+

Author: Rainer Gerhards <rgerhards@adiscon.com>

+

Description:

+

This module supports writing message into files on Hadoop's HDFS +file system. +

Configuration Directives:

+
    +
  • $...
    +option... +
  • +
+Caveats/Known Bugs: +

Building omhdfs is a challenge because we could not yet find out how +to integrate Java properly into the autotools build process. The issue is +that HDFS is written in Java and libhdfs uses JNI to talk to it. That requires +that various system-specific environment options and pathes be set correctly. At +this point, we leave this to the user. If someone know how to do it better, +please drop us a line! +

In order to build, you need to set these environment variables BEFORE running +./configure: +

    +
  • JAVA_INCLUDES - must have all include pathes that are needed to build +JNI C programms, including the -I options necessary for gcc. An example is
    +# export JAVA_INCLUDES="-I/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/include -I/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/include/linux" +
  • JAVA_LIBS - must have all library pathes that are needed to build +JNI C programms, including the -l/-L options necessary for gcc. An example is
    +# export export JAVA_LIBS="-L/usr/java/jdk1.6.0_21/jre/lib/amd64 -L/usr/java/jdk1.6.0_21/jre/lib/amd64/server -ljava -ljvm -lverify" + +
+

Sample:

+

+

+ +[manual index] [rsyslog site]

+

This documentation is part of the rsyslog +project.
+Copyright © 2010 by Rainer Gerhards and +Adiscon. +Released under the GNU GPL version 3 or higher.

+ + diff --git a/doc/rsyslog_conf_modules.html b/doc/rsyslog_conf_modules.html index 85899954..74aa319c 100644 --- a/doc/rsyslog_conf_modules.html +++ b/doc/rsyslog_conf_modules.html @@ -66,6 +66,7 @@ permits rsyslog to alert folks by mail if something important happens
  • omoracle - output module for Oracle (native OCI interface)
  • omudpspoof - output module sending UDP syslog messages with a spoofed address
  • omuxsock - output module Unix domain sockets
  • +
  • omhdfs - output module for Hadoop's HDFS file system
  • Parser Modules

    diff --git a/plugins/omhdfs/Makefile.am b/plugins/omhdfs/Makefile.am index 23fcf75a..2e7ef8ea 100644 --- a/plugins/omhdfs/Makefile.am +++ b/plugins/omhdfs/Makefile.am @@ -1,6 +1,7 @@ pkglib_LTLIBRARIES = omhdfs.la omhdfs_la_SOURCES = omhdfs.c -omhdfs_la_CPPFLAGS = -I$(top_srcdir) $(PTHREADS_CFLAGS) $(RSRT_CFLAGS) -I/usr/lib/jvm/java-6-sun/include -I/usr/lib/jvm/java-6-sun/include/linux -omhdfs_la_LDFLAGS = -module -avoid-version -lhdfs -L/usr/lib/jvm/java-6-sun-1.6.0.20/jre/lib/amd64 -L/usr/lib/jvm/java-6-sun-1.6.0.20/jre/lib/amd64/server -ljava -ljvm -lverify +#omhdfs_la_CPPFLAGS = -I$(top_srcdir) $(PTHREADS_CFLAGS) $(RSRT_CFLAGS) -I/usr/lib/jvm/java-6-sun/include -I/usr/lib/jvm/java-6-sun/include/linux +omhdfs_la_CPPFLAGS = -I$(top_srcdir) $(PTHREADS_CFLAGS) $(RSRT_CFLAGS) $(JAVA_INCLUDES) +omhdfs_la_LDFLAGS = -module -avoid-version -lhdfs $(JAVA_LIBS) omhdfs_la_LIBADD = -- cgit From d8a1489f545179591abced679ba24831d85ca224 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 1 Oct 2010 11:29:50 +0200 Subject: omhdfs: cleanup and lots of improvement now things look much better, also done some prep in order to support a file cache (we need this for multiple selectors writing to the same file). --- plugins/omhdfs/omhdfs.c | 247 +++++++++++++++++++++++++++++------------------- runtime/rsyslog.h | 3 + 2 files changed, 153 insertions(+), 97 deletions(-) diff --git a/plugins/omhdfs/omhdfs.c b/plugins/omhdfs/omhdfs.c index fcdff6e5..71080585 100644 --- a/plugins/omhdfs/omhdfs.c +++ b/plugins/omhdfs/omhdfs.c @@ -23,13 +23,6 @@ * A copy of the GPL can be found in the file "COPYING" in this distribution. */ -/* this is kind of a hack, if defined, it instructs omhdfs to use - * the regular (non-hdfs) file system calls. This eases development - * (and hopefully troubleshooting) especially in cases when no - * hdfs environment is available. - */ -//#define USE_REGULAR_FS 1 - #include "config.h" #include "rsyslog.h" #include @@ -42,6 +35,8 @@ #include #include #include +#include +#include #include "syslogd-types.h" #include "srUtils.h" @@ -50,15 +45,14 @@ #include "cfsysline.h" #include "module-template.h" #include "unicode-helper.h" -#ifndef USE_REGULAR_FS -# include "hdfs.h" -#endif +#include "errmsg.h" MODULE_TYPE_OUTPUT /* internal structures */ DEF_OMOD_STATIC_DATA +DEFobjCurrIf(errmsg) /* globals for default values */ static uchar *fileName = NULL; @@ -66,18 +60,23 @@ static uchar *hdfsHost = NULL; int hdfsPort = 0; /* end globals for default values */ -typedef struct _instanceData { - uchar *fileName; -# ifdef USE_REGULAR_FS - short fd; /* file descriptor for (current) file */ -# else +typedef struct { + uchar *name; hdfsFS fs; - hdfsFile fd; + hdfsFile fh; const char *hdfsHost; tPort hdfsPort; -# endif + int nUsers; + pthread_mutex_t mut; +} file_t; + + +typedef struct _instanceData { + file_t *pFile; } instanceData; +/* forward definitions (down here, need data types) */ +static inline rsRetVal fileClose(file_t *pFile); BEGINisCompatibleWithFeature CODESTARTisCompatibleWithFeature @@ -88,9 +87,7 @@ ENDisCompatibleWithFeature BEGINdbgPrintInstInfo CODESTARTdbgPrintInstInfo - printf("omhdfs: file:%s", pData->fileName); - //if (pData->fd == -1) - //printf(" (unused)"); + printf("omhdfs: file:%s", pData->pFile->name); ENDdbgPrintInstInfo @@ -100,10 +97,10 @@ static void prepareFile(instanceData *pData, uchar *newFileName) { if(access((char*)newFileName, F_OK) == 0) { /* file already exists */ - pData->fd = open((char*) newFileName, O_WRONLY|O_APPEND|O_CREAT|O_NOCTTY, + pData->fh = open((char*) newFileName, O_WRONLY|O_APPEND|O_CREAT|O_NOCTTY, pData->fCreateMode); } else { - pData->fd = -1; + pData->fh = -1; /* file does not exist, create it (and eventually parent directories */ if(pData->bCreateDirs) { /* we fist need to create parent dirs if they are missing @@ -119,18 +116,18 @@ static void prepareFile(instanceData *pData, uchar *newFileName) /* no matter if we needed to create directories or not, we now try to create * the file. -- rgerhards, 2008-12-18 (based on patch from William Tisater) */ - pData->fd = open((char*) newFileName, O_WRONLY|O_APPEND|O_CREAT|O_NOCTTY, + pData->fh = open((char*) newFileName, O_WRONLY|O_APPEND|O_CREAT|O_NOCTTY, pData->fCreateMode); - if(pData->fd != -1) { + if(pData->fh != -1) { /* check and set uid/gid */ if(pData->fileUID != (uid_t)-1 || pData->fileGID != (gid_t) -1) { /* we need to set owner/group */ - if(fchown(pData->fd, pData->fileUID, + if(fchown(pData->fh, pData->fileUID, pData->fileGID) != 0) { if(pData->bFailOnChown) { int eSave = errno; - close(pData->fd); - pData->fd = -1; + close(pData->fh); + pData->fh = -1; errno = eSave; } /* we will silently ignore the chown() failure @@ -143,20 +140,71 @@ static void prepareFile(instanceData *pData, uchar *newFileName) } #endif -static void -prepareFile(instanceData *pData, uchar *newFileName) +/* ---BEGIN FILE OBJECT---------------------------------------------------- */ +/* This code handles the "file object". This is split from the actual + * instance data, because several instances may write into the same file. + * If so, we need to use a single object, and also synchronize their writes. + * So we keep the file object separately, and just stick a reference into + * the instance data. + */ + +static inline rsRetVal +fileObjConstruct(file_t **ppFile) +{ + file_t *pFile; + DEFiRet; + + CHKmalloc(pFile = malloc(sizeof(file_t))); + pFile->name = NULL; + pFile->hdfsHost = NULL; + pFile->fh = NULL; + pFile->nUsers = 0; + + *ppFile = pFile; +finalize_it: + RETiRet; +} + +static inline void +fileObjAddUser(file_t *pFile) +{ + /* init mutex only when second user is added */ + ++pFile->nUsers; + if(pFile->nUsers == 2) + pthread_mutex_init(&pFile->mut, NULL); +} + +static inline rsRetVal +fileObjDestruct(file_t **ppFile) +{ + file_t *pFile = *ppFile; + if(pFile->nUsers > 1) + pthread_mutex_destroy(&pFile->mut); + fileClose(pFile); + free(pFile->name); + free((char*)pFile->hdfsHost); + free(pFile->fh); + + return RS_RET_OK; +} + +static inline rsRetVal +fileOpen(file_t *pFile) { -# if USE_REGULAR_FS - pData->fd = open((char*) newFileName, O_WRONLY|O_APPEND|O_CREAT|O_NOCTTY, 0666); -# else - dbgprintf("omhdfs: try to connect to host '%s' at port %d\n", - pData->hdfsHost, pData->hdfsPort); - pData->fs = hdfsConnect(pData->hdfsHost, pData->hdfsPort); - if(pData->fs == NULL) { - dbgprintf("omhdfs: error can not connect to hdfs\n"); + DEFiRet; + + assert(pFile->fh == NULL); + if(pFile->nUsers > 1) + d_pthread_mutex_lock(&pFile->mut); + DBGPRINTF("omhdfs: try to connect to HDFS at host '%s', port %d\n", + pFile->hdfsHost, pFile->hdfsPort); + pFile->fs = hdfsConnect(pFile->hdfsHost, pFile->hdfsPort); + if(pFile->fs == NULL) { + DBGPRINTF("omhdfs: error can not connect to hdfs\n"); + ABORT_FINALIZE(RS_RET_SUSPENDED); } - pData->fd = hdfsOpenFile(pData->fs, (char*)newFileName, O_WRONLY|O_APPEND, 0, 0, 0); - if(pData->fd == NULL) { + pFile->fh = hdfsOpenFile(pFile->fs, (char*)pFile->name, O_WRONLY|O_APPEND, 0, 0, 0); + if(pFile->fh == NULL) { /* maybe the file does not exist, so we try to create it now. * Note that we can not use hdfsExists() because of a deficit in * it: https://issues.apache.org/jira/browse/HDFS-1154 @@ -164,68 +212,75 @@ prepareFile(instanceData *pData, uchar *newFileName) * the file does not exist. */ if(errno == ENOENT) { - dbgprintf("omhdfs: ENOENT trying to append to '%s', now trying create\n", - newFileName); - pData->fd = hdfsOpenFile(pData->fs, (char*)newFileName, O_WRONLY|O_CREAT, 0, 0, 0); + DBGPRINTF("omhdfs: ENOENT trying to append to '%s', now trying create\n", + pFile->name); + pFile->fh = hdfsOpenFile(pFile->fs, (char*)pFile->name, O_WRONLY|O_CREAT, 0, 0, 0); } } - if(!pData->fd) { - dbgprintf("omhdfs: failed to open %s for writing!\n", newFileName); - // TODO: suspend/error report + if(pFile->fh == NULL) { + DBGPRINTF("omhdfs: failed to open %s for writing!\n", pFile->name); + ABORT_FINALIZE(RS_RET_SUSPENDED); } -# endif +finalize_it: + if(pFile->nUsers > 1) + d_pthread_mutex_unlock(&pFile->mut); + RETiRet; } -static rsRetVal writeFile(uchar **ppString, instanceData *pData) + +static inline rsRetVal +fileWrite(file_t *pFile, uchar *buf) { size_t lenWrite; DEFiRet; -# if USE_REGULAR_FS - if (write(pData->fd, ppString[0], strlen((char*)ppString[0])) < 0) { - int e = errno; - dbgprintf("omhdfs write error!\n"); - - /* If the filesystem is filled up, just ignore - * it for now and continue writing when possible - */ - //if(pData->fileType == eTypeFILE && e == ENOSPC) - //return RS_RET_OK; - - //(void) close(pData->fd); - iRet = RS_RET_DISABLE_ACTION; - errno = e; - //logerror((char*) pData->f_fname); - } -# else - lenWrite = strlen((char*) ppString[0]); - tSize num_written_bytes = hdfsWrite(pData->fs, pData->fd, ppString[0], lenWrite); + assert(pFile->fh != NULL); + if(pFile->nUsers > 1) + d_pthread_mutex_lock(&pFile->mut); + lenWrite = strlen((char*) buf); + tSize num_written_bytes = hdfsWrite(pFile->fs, pFile->fh, buf, lenWrite); if((unsigned) num_written_bytes != lenWrite) { - dbgprintf("omhdfs: failed to write %s, expected %lu bytes, written %lu\n", pData->fileName, - lenWrite, (unsigned long) num_written_bytes); - // TODO: suspend/error report + errmsg.LogError(errno, RS_RET_ERR_HDFS_WRITE, "omhdfs: failed to write %s, expected %lu bytes, " + "written %lu\n", pFile->name, lenWrite, (unsigned long) num_written_bytes); + ABORT_FINALIZE(RS_RET_SUSPENDED); } -# endif +finalize_it: + if(pFile->nUsers > 1) + d_pthread_mutex_unlock(&pFile->mut); RETiRet; } +static inline rsRetVal +fileClose(file_t *pFile) +{ + if(pFile->nUsers > 1) + d_pthread_mutex_lock(&pFile->mut); + if(pFile->fh != NULL) { + hdfsCloseFile(pFile->fs, pFile->fh); + pFile->fh = NULL; + } + + if(pFile->nUsers > 1) + d_pthread_mutex_unlock(&pFile->mut); + + return RS_RET_OK; +} + +/* ---END FILE OBJECT---------------------------------------------------- */ + + BEGINcreateInstance CODESTARTcreateInstance - //pData->fd = -1; + pData->pFile = NULL; ENDcreateInstance BEGINfreeInstance CODESTARTfreeInstance -# ifdef USE_REGULAR_FS - if(pData->fd != -1) - close(pData->fd); -# else - hdfsCloseFile(pData->fs, pData->fd); -# endif + fileObjDestruct(&pData->pFile); ENDfreeInstance @@ -235,8 +290,8 @@ ENDtryResume BEGINdoAction CODESTARTdoAction - dbgprintf(" (%s)\n", pData->fileName); - iRet = writeFile(ppString, pData); + DBGPRINTF(" (%s)\n", pData->pFile->name); + iRet = fileWrite(pData->pFile, ppString[0]); ENDdoAction @@ -251,34 +306,29 @@ CODESTARTparseSelectorAct /* ok, if we reach this point, we have something for us */ p += sizeof(":omhdfs:") - 1; /* eat indicator sequence (-1 because of '\0'!) */ CHKiRet(createInstance(&pData)); - CODE_STD_STRING_REQUESTparseSelectorAct(1) - /* rgerhards 2004-11-17: from now, we need to have different - * processing, because after the first comma, the template name - * to use is specified. So we need to scan for the first coma first - * and then look at the rest of the line. - */ CHKiRet(cflineParseTemplateName(&p, *ppOMSR, 0, 0, (uchar*) "RSYSLOG_FileFormat")); //(pszFileDfltTplName == NULL) ? (uchar*)"RSYSLOG_FileFormat" : pszFileDfltTplName)); - // TODO: check for NULL filename - CHKmalloc(pData->fileName = ustrdup(fileName)); + if(fileName == NULL) { + errmsg.LogError(0, RS_RET_ERR_HDFS_OPEN, "omhdfs: no file name specified, can not continue"); + ABORT_FINALIZE(RS_RET_FILE_NOT_SPECIFIED); + } + + CHKiRet(fileObjConstruct(&pData->pFile)); if(hdfsHost == NULL) { - pData->hdfsHost = "default"; + CHKmalloc(pData->pFile->hdfsHost = strdup("default")); } else { - CHKmalloc(pData->hdfsHost = strdup((char*)hdfsHost)); + CHKmalloc(pData->pFile->hdfsHost = strdup((char*)hdfsHost)); } - pData->hdfsPort = hdfsPort; + pData->pFile->hdfsPort = hdfsPort; - prepareFile(pData, pData->fileName); + fileOpen(pData->pFile); -#if 0 - if ( pData->fd < 0 ){ - pData->fd = -1; - dbgprintf("Error opening log file: %s\n", pData->f_fname); - logerror((char*) pData->f_fname); + if(pData->pFile->fh == NULL){ + errmsg.LogError(0, RS_RET_ERR_HDFS_OPEN, "omhdfs: failed to open %s - retrying later", pData->pFile->name); + iRet = RS_RET_SUSPENDED; } -#endif CODE_STD_FINALIZERparseSelectorAct ENDparseSelectorAct @@ -307,6 +357,7 @@ static rsRetVal resetConfigVariables(uchar __attribute__((unused)) *pp, void __a BEGINmodExit CODESTARTmodExit + objRelease(errmsg, CORE_COMPONENT); ENDmodExit @@ -320,6 +371,8 @@ BEGINmodInit() CODESTARTmodInit *ipIFVersProvided = CURR_MOD_IF_VERSION; CODEmodInit_QueryRegCFSLineHdlr + CHKiRet(objUse(errmsg, CORE_COMPONENT)); + CHKiRet(omsdRegCFSLineHdlr((uchar *)"omhdfsfilename", 0, eCmdHdlrGetWord, NULL, &fileName, NULL)); CHKiRet(omsdRegCFSLineHdlr((uchar *)"omhdfshost", 0, eCmdHdlrGetWord, NULL, &hdfsHost, NULL)); CHKiRet(omsdRegCFSLineHdlr((uchar *)"omhdfsport", 0, eCmdHdlrInt, NULL, &hdfsPort, NULL)); diff --git a/runtime/rsyslog.h b/runtime/rsyslog.h index 43203378..7ccc9cb8 100644 --- a/runtime/rsyslog.h +++ b/runtime/rsyslog.h @@ -458,6 +458,9 @@ enum rsRetVal_ /** return value. All methods return this if not specified oth RS_RET_INTERNAL_ERROR = -2175, /**< rsyslogd internal error, unexpected code path reached */ RS_RET_ERR_CRE_AFUX = -2176, /**< error creating AF_UNIX socket (and binding it) */ RS_RET_RATE_LIMITED = -2177, /**< some messages discarded due to exceeding a rate limit */ + RS_RET_ERR_HDFS_WRITE = -2178, /**< error writing to HDFS */ + RS_RET_ERR_HDFS_OPEN = -2179, /**< error during hdfsOpen (e.g. file does not exist) */ + RS_RET_FILE_NOT_SPECIFIED = -2180, /**< file name not configured where this was required */ /* RainerScript error messages (range 1000.. 1999) */ RS_RET_SYSVAR_NOT_FOUND = 1001, /**< system variable could not be found (maybe misspelled) */ -- cgit From 0ee524c391d017225049542fffe572d7de7d1512 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 1 Oct 2010 09:55:21 +0000 Subject: omhdfs: fixed small bug ... that I could only see on my execution environment (I right now have two envs, a local one where I can compile, but not run and a remote one where I can do both, but this is a bit less convenient to use). --- plugins/omhdfs/omhdfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/omhdfs/omhdfs.c b/plugins/omhdfs/omhdfs.c index 71080585..5d28d5dd 100644 --- a/plugins/omhdfs/omhdfs.c +++ b/plugins/omhdfs/omhdfs.c @@ -316,6 +316,7 @@ CODESTARTparseSelectorAct } CHKiRet(fileObjConstruct(&pData->pFile)); + CHKmalloc(pData->pFile->name = (uchar*)strdup((char*)fileName)); if(hdfsHost == NULL) { CHKmalloc(pData->pFile->hdfsHost = strdup("default")); } else { -- cgit From 9696cdef34f5d033564138fb9d4afb87daa6b1be Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 1 Oct 2010 12:32:01 +0200 Subject: omhdfs: files now kept inside a hashtable for use by multiple actions Note:compiles, but not yet tested --- plugins/omhdfs/omhdfs.c | 40 +++++++++++++++++++++++++++++----------- runtime/hashtable.h | 4 +++- runtime/hashtable/hashtable.c | 27 +++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 12 deletions(-) diff --git a/plugins/omhdfs/omhdfs.c b/plugins/omhdfs/omhdfs.c index 5d28d5dd..eaca90e4 100644 --- a/plugins/omhdfs/omhdfs.c +++ b/plugins/omhdfs/omhdfs.c @@ -46,6 +46,7 @@ #include "module-template.h" #include "unicode-helper.h" #include "errmsg.h" +#include "hashtable.h" MODULE_TYPE_OUTPUT @@ -54,6 +55,9 @@ MODULE_TYPE_OUTPUT DEF_OMOD_STATIC_DATA DEFobjCurrIf(errmsg) +/* global data */ +static struct hashtable *files; /* holds all file objects that we know */ + /* globals for default values */ static uchar *fileName = NULL; static uchar *hdfsHost = NULL; @@ -172,6 +176,7 @@ fileObjAddUser(file_t *pFile) ++pFile->nUsers; if(pFile->nUsers == 2) pthread_mutex_init(&pFile->mut, NULL); + dbgprintf("omhdfs: file %s now being used by %d actions\n", pFile->name, pFile->nUsers); } static inline rsRetVal @@ -296,6 +301,9 @@ ENDdoAction BEGINparseSelectorAct + file_t *pFile; + int r; + uchar *keybuf; CODESTARTparseSelectorAct /* first check if this config line is actually for us */ @@ -315,21 +323,28 @@ CODESTARTparseSelectorAct ABORT_FINALIZE(RS_RET_FILE_NOT_SPECIFIED); } - CHKiRet(fileObjConstruct(&pData->pFile)); - CHKmalloc(pData->pFile->name = (uchar*)strdup((char*)fileName)); - if(hdfsHost == NULL) { - CHKmalloc(pData->pFile->hdfsHost = strdup("default")); - } else { - CHKmalloc(pData->pFile->hdfsHost = strdup((char*)hdfsHost)); + pFile = hashtable_search(files, fileName); + if(pFile == NULL) { + /* we need a new file object, this one not seen before */ + CHKiRet(fileObjConstruct(&pFile)); + CHKmalloc(pFile->name = (uchar*)strdup((char*)fileName)); + CHKmalloc(keybuf = ustrdup(fileName)); + r = hashtable_insert(files, keybuf, pFile); + if(r == 0) + ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY); } - pData->pFile->hdfsPort = hdfsPort; + fileObjAddUser(pFile); - fileOpen(pData->pFile); - - if(pData->pFile->fh == NULL){ - errmsg.LogError(0, RS_RET_ERR_HDFS_OPEN, "omhdfs: failed to open %s - retrying later", pData->pFile->name); + CHKmalloc(pFile->hdfsHost = strdup((hdfsHost == NULL) ? "default" : (char*) hdfsHost)); + pFile->hdfsPort = hdfsPort; + fileOpen(pFile); + if(pFile->fh == NULL){ + errmsg.LogError(0, RS_RET_ERR_HDFS_OPEN, "omhdfs: failed to open %s - retrying later", pFile->name); iRet = RS_RET_SUSPENDED; } + + pData->pFile = pFile; + CODE_STD_FINALIZERparseSelectorAct ENDparseSelectorAct @@ -359,6 +374,8 @@ static rsRetVal resetConfigVariables(uchar __attribute__((unused)) *pp, void __a BEGINmodExit CODESTARTmodExit objRelease(errmsg, CORE_COMPONENT); + if(files != NULL) + hashtable_destroy(files, 1); /* 1 => free all values automatically */ ENDmodExit @@ -373,6 +390,7 @@ CODESTARTmodInit *ipIFVersProvided = CURR_MOD_IF_VERSION; CODEmodInit_QueryRegCFSLineHdlr CHKiRet(objUse(errmsg, CORE_COMPONENT)); + CHKmalloc(files = create_hashtable(20, hash_from_string, key_equals_string)); CHKiRet(omsdRegCFSLineHdlr((uchar *)"omhdfsfilename", 0, eCmdHdlrGetWord, NULL, &fileName, NULL)); CHKiRet(omsdRegCFSLineHdlr((uchar *)"omhdfshost", 0, eCmdHdlrGetWord, NULL, &hdfsHost, NULL)); diff --git a/runtime/hashtable.h b/runtime/hashtable.h index b90781ab..0f980127 100644 --- a/runtime/hashtable.h +++ b/runtime/hashtable.h @@ -196,4 +196,6 @@ hashtable_destroy(struct hashtable *h, int free_values); * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + */ +unsigned int hash_from_string(void *k) ; +int key_equals_string(void *key1, void *key2); diff --git a/runtime/hashtable/hashtable.c b/runtime/hashtable/hashtable.c index a10e3bc6..e2a2b3f4 100644 --- a/runtime/hashtable/hashtable.c +++ b/runtime/hashtable/hashtable.c @@ -241,6 +241,33 @@ hashtable_destroy(struct hashtable *h, int free_values) free(h); } +/* some generic hash functions */ + +/* one provided by Aaaron Wiebe based on perl's hashng algorithm + * (so probably pretty generic). Not for excessively large strings! + */ +unsigned int +hash_from_string(void *k) +{ + int len; + char *rkey = (char*) k; + unsigned hashval = 1; + + len = (int) strlen(rkey); + while (len--) + hashval = hashval * 33 + *rkey++; + + return hashval; +} + + +int +key_equals_string(void *key1, void *key2) +{ + return strcmp(key1, key2); +} + + /* * Copyright (c) 2002, Christopher Clark * All rights reserved. -- cgit From 255895a58b3f2a54fecf971da700caf265b4e1f0 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 1 Oct 2010 13:59:48 +0000 Subject: omhdfs: more improvements finally this looks almost production ready for files where no directory path needs to be created --- plugins/imuxsock/imuxsock.c | 4 +-- plugins/omhdfs/Makefile.am | 3 +-- plugins/omhdfs/omhdfs.c | 47 ++++++++++++++++++++++++----------- runtime/hashtable.h | 3 ++- runtime/hashtable/hashtable.c | 17 ++++++++++--- runtime/hashtable/hashtable_private.h | 1 + 6 files changed, 53 insertions(+), 22 deletions(-) diff --git a/plugins/imuxsock/imuxsock.c b/plugins/imuxsock/imuxsock.c index 24bcebb7..ad2d61c8 100644 --- a/plugins/imuxsock/imuxsock.c +++ b/plugins/imuxsock/imuxsock.c @@ -278,7 +278,7 @@ addLstnSocketName(void __attribute__((unused)) *pVal, uchar *pNewVal) } CHKiRet(prop.ConstructFinalize(listeners[nfd].hostName)); if(ratelimitInterval > 0) { - if((listeners[nfd].ht = create_hashtable(1000, hash_from_key_fn, key_equals_fn)) == NULL) { + if((listeners[nfd].ht = create_hashtable(1000, hash_from_key_fn, key_equals_fn, NULL)) == NULL) { /* in this case, we simply turn of rate-limiting */ dbgprintf("imuxsock: turning off rate limiting because we could not " "create hash table\n"); @@ -755,7 +755,7 @@ CODESTARTwillRun if(pLogSockName != NULL) listeners[0].sockName = pLogSockName; if(ratelimitIntervalSysSock > 0) { - if((listeners[0].ht = create_hashtable(1000, hash_from_key_fn, key_equals_fn)) == NULL) { + if((listeners[0].ht = create_hashtable(1000, hash_from_key_fn, key_equals_fn, NULL)) == NULL) { /* in this case, we simply turn of rate-limiting */ dbgprintf("imuxsock: turning off rate limiting because we could not " "create hash table\n"); diff --git a/plugins/omhdfs/Makefile.am b/plugins/omhdfs/Makefile.am index 2e7ef8ea..95e6b102 100644 --- a/plugins/omhdfs/Makefile.am +++ b/plugins/omhdfs/Makefile.am @@ -1,7 +1,6 @@ pkglib_LTLIBRARIES = omhdfs.la omhdfs_la_SOURCES = omhdfs.c -#omhdfs_la_CPPFLAGS = -I$(top_srcdir) $(PTHREADS_CFLAGS) $(RSRT_CFLAGS) -I/usr/lib/jvm/java-6-sun/include -I/usr/lib/jvm/java-6-sun/include/linux omhdfs_la_CPPFLAGS = -I$(top_srcdir) $(PTHREADS_CFLAGS) $(RSRT_CFLAGS) $(JAVA_INCLUDES) omhdfs_la_LDFLAGS = -module -avoid-version -lhdfs $(JAVA_LIBS) -omhdfs_la_LIBADD = +omhdfs_la_LIBADD = $(RSRT_LIBS) diff --git a/plugins/omhdfs/omhdfs.c b/plugins/omhdfs/omhdfs.c index eaca90e4..734c28cd 100644 --- a/plugins/omhdfs/omhdfs.c +++ b/plugins/omhdfs/omhdfs.c @@ -61,6 +61,7 @@ static struct hashtable *files; /* holds all file objects that we know */ /* globals for default values */ static uchar *fileName = NULL; static uchar *hdfsHost = NULL; +static uchar *dfltTplName = NULL; /* default template name to use */ int hdfsPort = 0; /* end globals for default values */ @@ -176,10 +177,10 @@ fileObjAddUser(file_t *pFile) ++pFile->nUsers; if(pFile->nUsers == 2) pthread_mutex_init(&pFile->mut, NULL); - dbgprintf("omhdfs: file %s now being used by %d actions\n", pFile->name, pFile->nUsers); + DBGPRINTF("omhdfs: file %s now being used by %d actions\n", pFile->name, pFile->nUsers); } -static inline rsRetVal +static rsRetVal fileObjDestruct(file_t **ppFile) { file_t *pFile = *ppFile; @@ -193,6 +194,18 @@ fileObjDestruct(file_t **ppFile) return RS_RET_OK; } +/* this function is to be used as destructor for the + * hash table code. + */ +static void +fileObjDestruct4Hashtable(void *ptr) +{ + dbgprintf("omfile: fileObjDestruct4Hashtable called\n"); + file_t *pFile = (file_t*) ptr; + fileObjDestruct(&pFile); +} + + static inline rsRetVal fileOpen(file_t *pFile) { @@ -261,6 +274,8 @@ finalize_it: static inline rsRetVal fileClose(file_t *pFile) { + DEFiRet; + if(pFile->nUsers > 1) d_pthread_mutex_lock(&pFile->mut); if(pFile->fh != NULL) { @@ -271,7 +286,7 @@ fileClose(file_t *pFile) if(pFile->nUsers > 1) d_pthread_mutex_unlock(&pFile->mut); - return RS_RET_OK; + RETiRet; } /* ---END FILE OBJECT---------------------------------------------------- */ @@ -285,7 +300,8 @@ ENDcreateInstance BEGINfreeInstance CODESTARTfreeInstance - fileObjDestruct(&pData->pFile); + if(pData->pFile != NULL) + fileObjDestruct(&pData->pFile); ENDfreeInstance @@ -315,8 +331,8 @@ CODESTARTparseSelectorAct p += sizeof(":omhdfs:") - 1; /* eat indicator sequence (-1 because of '\0'!) */ CHKiRet(createInstance(&pData)); CODE_STD_STRING_REQUESTparseSelectorAct(1) - CHKiRet(cflineParseTemplateName(&p, *ppOMSR, 0, 0, (uchar*) "RSYSLOG_FileFormat")); - //(pszFileDfltTplName == NULL) ? (uchar*)"RSYSLOG_FileFormat" : pszFileDfltTplName)); + CHKiRet(cflineParseTemplateName(&p, *ppOMSR, 0, 0, + (dfltTplName == NULL) ? (uchar*)"RSYSLOG_FileFormat" : dfltTplName)); if(fileName == NULL) { errmsg.LogError(0, RS_RET_ERR_HDFS_OPEN, "omhdfs: no file name specified, can not continue"); @@ -327,17 +343,18 @@ CODESTARTparseSelectorAct if(pFile == NULL) { /* we need a new file object, this one not seen before */ CHKiRet(fileObjConstruct(&pFile)); - CHKmalloc(pFile->name = (uchar*)strdup((char*)fileName)); + CHKmalloc(pFile->name = fileName); CHKmalloc(keybuf = ustrdup(fileName)); + fileName = NULL; /* re-set, data passed to file object */ + CHKmalloc(pFile->hdfsHost = strdup((hdfsHost == NULL) ? "default" : (char*) hdfsHost)); + pFile->hdfsPort = hdfsPort; + fileOpen(pFile); r = hashtable_insert(files, keybuf, pFile); if(r == 0) ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY); } fileObjAddUser(pFile); - CHKmalloc(pFile->hdfsHost = strdup((hdfsHost == NULL) ? "default" : (char*) hdfsHost)); - pFile->hdfsPort = hdfsPort; - fileOpen(pFile); if(pFile->fh == NULL){ errmsg.LogError(0, RS_RET_ERR_HDFS_OPEN, "omhdfs: failed to open %s - retrying later", pFile->name); iRet = RS_RET_SUSPENDED; @@ -390,11 +407,13 @@ CODESTARTmodInit *ipIFVersProvided = CURR_MOD_IF_VERSION; CODEmodInit_QueryRegCFSLineHdlr CHKiRet(objUse(errmsg, CORE_COMPONENT)); - CHKmalloc(files = create_hashtable(20, hash_from_string, key_equals_string)); + CHKmalloc(files = create_hashtable(20, hash_from_string, key_equals_string, + fileObjDestruct4Hashtable)); - CHKiRet(omsdRegCFSLineHdlr((uchar *)"omhdfsfilename", 0, eCmdHdlrGetWord, NULL, &fileName, NULL)); - CHKiRet(omsdRegCFSLineHdlr((uchar *)"omhdfshost", 0, eCmdHdlrGetWord, NULL, &hdfsHost, NULL)); - CHKiRet(omsdRegCFSLineHdlr((uchar *)"omhdfsport", 0, eCmdHdlrInt, NULL, &hdfsPort, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"omhdfsfilename", 0, eCmdHdlrGetWord, NULL, &fileName, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"omhdfshost", 0, eCmdHdlrGetWord, NULL, &hdfsHost, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"omhdfsport", 0, eCmdHdlrInt, NULL, &hdfsPort, NULL)); + CHKiRet(regCfSysLineHdlr((uchar *)"omhdfsdefaulttemplate", 0, eCmdHdlrGetWord, NULL, &dfltTplName, NULL)); CHKiRet(omsdRegCFSLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, resetConfigVariables, NULL, STD_LOADABLE_MODULE_ID)); CODEmodInit_QueryRegCFSLineHdlr ENDmodInit diff --git a/runtime/hashtable.h b/runtime/hashtable.h index 0f980127..f777ad0b 100644 --- a/runtime/hashtable.h +++ b/runtime/hashtable.h @@ -68,13 +68,14 @@ struct hashtable; * @param minsize minimum initial size of hashtable * @param hashfunction function for hashing keys * @param key_eq_fn function for determining key equality + * @param dest destructor for value entries (NULL -> use free()) * @return newly created hashtable or NULL on failure */ struct hashtable * create_hashtable(unsigned int minsize, unsigned int (*hashfunction) (void*), - int (*key_eq_fn) (void*,void*)); + int (*key_eq_fn) (void*,void*), void (*dest) (void*)); /***************************************************************************** * hashtable_insert diff --git a/runtime/hashtable/hashtable.c b/runtime/hashtable/hashtable.c index e2a2b3f4..41fc60fe 100644 --- a/runtime/hashtable/hashtable.c +++ b/runtime/hashtable/hashtable.c @@ -29,7 +29,7 @@ const float max_load_factor = 0.65; struct hashtable * create_hashtable(unsigned int minsize, unsigned int (*hashf) (void*), - int (*eqf) (void*,void*)) + int (*eqf) (void*,void*), void (*dest)(void*)) { struct hashtable *h; unsigned int pindex, size = primes[0]; @@ -49,6 +49,7 @@ create_hashtable(unsigned int minsize, h->entrycount = 0; h->hashfn = hashf; h->eqfn = eqf; + h->dest = dest; h->loadlimit = (unsigned int) ceil(size * max_load_factor); return h; } @@ -225,7 +226,16 @@ hashtable_destroy(struct hashtable *h, int free_values) { e = table[i]; while (NULL != e) - { f = e; e = e->next; freekey(f->k); free(f->v); free(f); } + { + f = e; + e = e->next; + freekey(f->k); + if(h->dest == NULL) + free(f->v); + else + h->dest(f->v); + free(f); + } } } else @@ -264,7 +274,8 @@ hash_from_string(void *k) int key_equals_string(void *key1, void *key2) { - return strcmp(key1, key2); + /* we must return true IF the keys are equal! */ + return !strcmp(key1, key2); } diff --git a/runtime/hashtable/hashtable_private.h b/runtime/hashtable/hashtable_private.h index 3e95f600..10b82da4 100644 --- a/runtime/hashtable/hashtable_private.h +++ b/runtime/hashtable/hashtable_private.h @@ -21,6 +21,7 @@ struct hashtable { unsigned int primeindex; unsigned int (*hashfn) (void *k); int (*eqfn) (void *k1, void *k2); + void (*dest) (void *v); /* destructor for values, if NULL use free() */ }; /*****************************************************************************/ -- cgit From 670e81c9a8275a5509efd71dae66d9a267ec1574 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 1 Oct 2010 14:36:39 +0000 Subject: omhdfs: made action suspend/resume working --- plugins/omhdfs/omhdfs.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/plugins/omhdfs/omhdfs.c b/plugins/omhdfs/omhdfs.c index 734c28cd..4fbf2ef4 100644 --- a/plugins/omhdfs/omhdfs.c +++ b/plugins/omhdfs/omhdfs.c @@ -276,16 +276,19 @@ fileClose(file_t *pFile) { DEFiRet; + if(pFile->fh == NULL) + FINALIZE; + if(pFile->nUsers > 1) d_pthread_mutex_lock(&pFile->mut); - if(pFile->fh != NULL) { - hdfsCloseFile(pFile->fs, pFile->fh); - pFile->fh = NULL; - } + + hdfsCloseFile(pFile->fs, pFile->fh); + pFile->fh = NULL; if(pFile->nUsers > 1) d_pthread_mutex_unlock(&pFile->mut); +finalize_it: RETiRet; } @@ -307,12 +310,26 @@ ENDfreeInstance BEGINtryResume CODESTARTtryResume + fileClose(pData->pFile); + fileOpen(pData->pFile); + if(pData->pFile->fh == NULL){ + dbgprintf("omhdfs: tried to resume file %s, but still no luck...\n", + pData->pFile->name); + iRet = RS_RET_SUSPENDED; + } ENDtryResume BEGINdoAction CODESTARTdoAction DBGPRINTF(" (%s)\n", pData->pFile->name); + if(pData->pFile->fh == NULL) { + fileOpen(pData->pFile); + if(pData->pFile->fh == NULL) { + ABORT_FINALIZE(RS_RET_SUSPENDED); + } + } iRet = fileWrite(pData->pFile, ppString[0]); +finalize_it: ENDdoAction @@ -349,17 +366,16 @@ CODESTARTparseSelectorAct CHKmalloc(pFile->hdfsHost = strdup((hdfsHost == NULL) ? "default" : (char*) hdfsHost)); pFile->hdfsPort = hdfsPort; fileOpen(pFile); + if(pFile->fh == NULL){ + errmsg.LogError(0, RS_RET_ERR_HDFS_OPEN, "omhdfs: failed to open %s - " + "retrying later", pFile->name); + iRet = RS_RET_SUSPENDED; + } r = hashtable_insert(files, keybuf, pFile); if(r == 0) ABORT_FINALIZE(RS_RET_OUT_OF_MEMORY); } fileObjAddUser(pFile); - - if(pFile->fh == NULL){ - errmsg.LogError(0, RS_RET_ERR_HDFS_OPEN, "omhdfs: failed to open %s - retrying later", pFile->name); - iRet = RS_RET_SUSPENDED; - } - pData->pFile = pFile; CODE_STD_FINALIZERparseSelectorAct -- cgit From d5f16404f93d54afddebb9fb683469fc712d2335 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 1 Oct 2010 17:26:14 +0200 Subject: omhdfs: added doc --- ChangeLog | 3 +++ doc/omhdfs.html | 28 ++++++++++++++++++++++++---- plugins/omhdfs/omhdfs.c | 2 +- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index f9b79392..8129f634 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,7 @@ --------------------------------------------------------------------------- +Version 5.7.2 [V5-DEVEL] (rgerhards), 2010-09-?? +- support for Hadoop's HDFS added (via omhdfs) +--------------------------------------------------------------------------- Version 5.7.1 [V5-DEVEL] (rgerhards), 2010-09-?? - imuxsock now optionally use SCM_CREDENTIALS to pull the pid from the log socket itself diff --git a/doc/omhdfs.html b/doc/omhdfs.html index 827697b6..3849f167 100644 --- a/doc/omhdfs.html +++ b/doc/omhdfs.html @@ -12,8 +12,19 @@ file system.

    Configuration Directives:

      -
    • $...
      -option... +
    • $OMHDFSFileName [name]
      +The name of the file to which the output data shall be written. +
    • +
    • $OMHDFSHost [name]
      +Name or IP address of the HDFS host to connect to. +
    • +
    • $OMHDFSPort [name]
      +Port on which to connect to the HDFS host. +
    • +
    • $OMHDFSDefaultTemplate [name]
      +Default template to be used when none is specified. This saves the work of +specifying the same template ever and ever again. Of course, the default +template can be overwritten via the usual method.
    Caveats/Known Bugs: @@ -23,7 +34,8 @@ that HDFS is written in Java and libhdfs uses JNI to talk to it. That requires that various system-specific environment options and pathes be set correctly. At this point, we leave this to the user. If someone know how to do it better, please drop us a line! -

    In order to build, you need to set these environment variables BEFORE running +

      +
    • In order to build, you need to set these environment variables BEFORE running ./configure:
      • JAVA_INCLUDES - must have all include pathes that are needed to build @@ -32,12 +44,20 @@ JNI C programms, including the -I options necessary for gcc. An example is
      • JAVA_LIBS - must have all library pathes that are needed to build JNI C programms, including the -l/-L options necessary for gcc. An example is
        # export export JAVA_LIBS="-L/usr/java/jdk1.6.0_21/jre/lib/amd64 -L/usr/java/jdk1.6.0_21/jre/lib/amd64/server -ljava -ljvm -lverify" +
      +
    • As of HDFS architecture, you must make sure that all relevant environment +variables (the usual Java stuff and HADOOP's home directory) are properly set. +
    • As it looks, libhdfs makes Java throw exceptions to stdout. There is no +known work-around for this (and it usually should not case any troubles.

    Sample:

    - [manual index] [rsyslog site]

    This documentation is part of the rsyslog diff --git a/plugins/omhdfs/omhdfs.c b/plugins/omhdfs/omhdfs.c index 4fbf2ef4..42ed834f 100644 --- a/plugins/omhdfs/omhdfs.c +++ b/plugins/omhdfs/omhdfs.c @@ -1,5 +1,5 @@ /* omhdfs.c - * This is the implementation of the build-in file output module. + * This is an output module to support Hadoop's HDFS. * * NOTE: read comments in module-template.h to understand how this file * works! -- cgit From e40cb595a2da943483124fae8f215a397add9fca Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Mon, 4 Oct 2010 13:58:41 +0200 Subject: omhdfs: support for HUP added --- plugins/omhdfs/omhdfs.c | 42 ++++- runtime/Makefile.am | 6 +- runtime/hashtable.c | 313 ++++++++++++++++++++++++++++++++++ runtime/hashtable/hashtable.c | 313 ---------------------------------- runtime/hashtable/hashtable_itr.c | 188 -------------------- runtime/hashtable/hashtable_itr.h | 112 ------------ runtime/hashtable/hashtable_private.h | 86 ---------- runtime/hashtable_itr.c | 190 +++++++++++++++++++++ runtime/hashtable_itr.h | 112 ++++++++++++ runtime/hashtable_private.h | 86 ++++++++++ 10 files changed, 738 insertions(+), 710 deletions(-) create mode 100644 runtime/hashtable.c delete mode 100644 runtime/hashtable/hashtable.c delete mode 100644 runtime/hashtable/hashtable_itr.c delete mode 100644 runtime/hashtable/hashtable_itr.h delete mode 100644 runtime/hashtable/hashtable_private.h create mode 100644 runtime/hashtable_itr.c create mode 100644 runtime/hashtable_itr.h create mode 100644 runtime/hashtable_private.h diff --git a/plugins/omhdfs/omhdfs.c b/plugins/omhdfs/omhdfs.c index 42ed834f..eefea722 100644 --- a/plugins/omhdfs/omhdfs.c +++ b/plugins/omhdfs/omhdfs.c @@ -47,6 +47,7 @@ #include "unicode-helper.h" #include "errmsg.h" #include "hashtable.h" +#include "hashtable_itr.h" MODULE_TYPE_OUTPUT @@ -200,7 +201,6 @@ fileObjDestruct(file_t **ppFile) static void fileObjDestruct4Hashtable(void *ptr) { - dbgprintf("omfile: fileObjDestruct4Hashtable called\n"); file_t *pFile = (file_t*) ptr; fileObjDestruct(&pFile); } @@ -214,6 +214,7 @@ fileOpen(file_t *pFile) assert(pFile->fh == NULL); if(pFile->nUsers > 1) d_pthread_mutex_lock(&pFile->mut); + DBGPRINTF("omhdfs: try to connect to HDFS at host '%s', port %d\n", pFile->hdfsHost, pFile->hdfsPort); pFile->fs = hdfsConnect(pFile->hdfsHost, pFile->hdfsPort); @@ -256,6 +257,17 @@ fileWrite(file_t *pFile, uchar *buf) assert(pFile->fh != NULL); if(pFile->nUsers > 1) d_pthread_mutex_lock(&pFile->mut); + + /* open file if not open. This must be done *here* and while mutex-protected + * because of HUP handling (which is async to normal processing!). + */ + if(pFile->fh == NULL) { + fileOpen(pFile); + if(pFile->fh == NULL) { + ABORT_FINALIZE(RS_RET_SUSPENDED); + } + } + lenWrite = strlen((char*) buf); tSize num_written_bytes = hdfsWrite(pFile->fs, pFile->fh, buf, lenWrite); if((unsigned) num_written_bytes != lenWrite) { @@ -321,15 +333,8 @@ ENDtryResume BEGINdoAction CODESTARTdoAction - DBGPRINTF(" (%s)\n", pData->pFile->name); - if(pData->pFile->fh == NULL) { - fileOpen(pData->pFile); - if(pData->pFile->fh == NULL) { - ABORT_FINALIZE(RS_RET_SUSPENDED); - } - } + DBGPRINTF("omuxsock: action to to write to %s\n", pData->pFile->name); iRet = fileWrite(pData->pFile, ppString[0]); -finalize_it: ENDdoAction @@ -382,6 +387,24 @@ CODE_STD_FINALIZERparseSelectorAct ENDparseSelectorAct +BEGINdoHUP + file_t *pFile; + struct hashtable_itr *itr; +CODESTARTdoHUP + /* Iterator constructor only returns a valid iterator if + * the hashtable is not empty */ + itr = hashtable_iterator(files); + if(hashtable_count(files) > 0) + { + do { + pFile = (file_t *) hashtable_iterator_value(itr); + fileClose(pFile); + DBGPRINTF("imuxsock: HUP, closing file %s\n", pFile->name); + } while (hashtable_iterator_advance(itr)); + } +ENDdoHUP + + /* Reset config variables for this module to default values. * rgerhards, 2007-07-17 */ @@ -415,6 +438,7 @@ ENDmodExit BEGINqueryEtryPt CODESTARTqueryEtryPt CODEqueryEtryPt_STD_OMOD_QUERIES +CODEqueryEtryPt_doHUP ENDqueryEtryPt diff --git a/runtime/Makefile.am b/runtime/Makefile.am index 5a0c4437..93817e75 100644 --- a/runtime/Makefile.am +++ b/runtime/Makefile.am @@ -95,9 +95,11 @@ librsyslog_la_SOURCES = \ ../parse.c \ ../parse.h \ \ - hashtable/hashtable.c \ + hashtable.c \ hashtable.h \ - hashtable/hashtable_private.h \ + hashtable_itr.c \ + hashtable_itr.h \ + hashtable_private.h \ \ ../outchannel.c \ ../outchannel.h \ diff --git a/runtime/hashtable.c b/runtime/hashtable.c new file mode 100644 index 00000000..41fc60fe --- /dev/null +++ b/runtime/hashtable.c @@ -0,0 +1,313 @@ +/* Copyright (C) 2004 Christopher Clark */ +/* taken from http://www.cl.cam.ac.uk/~cwc22/hashtable/ */ + +#include "hashtable.h" +#include "hashtable_private.h" +#include +#include +#include +#include + +/* +Credit for primes table: Aaron Krowne + http://br.endernet.org/~akrowne/ + http://planetmath.org/encyclopedia/GoodHashTablePrimes.html +*/ +static const unsigned int primes[] = { +53, 97, 193, 389, +769, 1543, 3079, 6151, +12289, 24593, 49157, 98317, +196613, 393241, 786433, 1572869, +3145739, 6291469, 12582917, 25165843, +50331653, 100663319, 201326611, 402653189, +805306457, 1610612741 +}; +const unsigned int prime_table_length = sizeof(primes)/sizeof(primes[0]); +const float max_load_factor = 0.65; + +/*****************************************************************************/ +struct hashtable * +create_hashtable(unsigned int minsize, + unsigned int (*hashf) (void*), + int (*eqf) (void*,void*), void (*dest)(void*)) +{ + struct hashtable *h; + unsigned int pindex, size = primes[0]; + /* Check requested hashtable isn't too large */ + if (minsize > (1u << 30)) return NULL; + /* Enforce size as prime */ + for (pindex=0; pindex < prime_table_length; pindex++) { + if (primes[pindex] > minsize) { size = primes[pindex]; break; } + } + h = (struct hashtable *)malloc(sizeof(struct hashtable)); + if (NULL == h) return NULL; /*oom*/ + h->table = (struct entry **)malloc(sizeof(struct entry*) * size); + if (NULL == h->table) { free(h); return NULL; } /*oom*/ + memset(h->table, 0, size * sizeof(struct entry *)); + h->tablelength = size; + h->primeindex = pindex; + h->entrycount = 0; + h->hashfn = hashf; + h->eqfn = eqf; + h->dest = dest; + h->loadlimit = (unsigned int) ceil(size * max_load_factor); + return h; +} + +/*****************************************************************************/ +unsigned int +hash(struct hashtable *h, void *k) +{ + /* Aim to protect against poor hash functions by adding logic here + * - logic taken from java 1.4 hashtable source */ + unsigned int i = h->hashfn(k); + i += ~(i << 9); + i ^= ((i >> 14) | (i << 18)); /* >>> */ + i += (i << 4); + i ^= ((i >> 10) | (i << 22)); /* >>> */ + return i; +} + +/*****************************************************************************/ +static int +hashtable_expand(struct hashtable *h) +{ + /* Double the size of the table to accomodate more entries */ + struct entry **newtable; + struct entry *e; + struct entry **pE; + unsigned int newsize, i, idx; + /* Check we're not hitting max capacity */ + if (h->primeindex == (prime_table_length - 1)) return 0; + newsize = primes[++(h->primeindex)]; + + newtable = (struct entry **)malloc(sizeof(struct entry*) * newsize); + if (NULL != newtable) + { + memset(newtable, 0, newsize * sizeof(struct entry *)); + /* This algorithm is not 'stable'. ie. it reverses the list + * when it transfers entries between the tables */ + for (i = 0; i < h->tablelength; i++) { + while (NULL != (e = h->table[i])) { + h->table[i] = e->next; + idx = indexFor(newsize,e->h); + e->next = newtable[idx]; + newtable[idx] = e; + } + } + free(h->table); + h->table = newtable; + } + /* Plan B: realloc instead */ + else + { + newtable = (struct entry **) + realloc(h->table, newsize * sizeof(struct entry *)); + if (NULL == newtable) { (h->primeindex)--; return 0; } + h->table = newtable; + memset(newtable[h->tablelength], 0, newsize - h->tablelength); + for (i = 0; i < h->tablelength; i++) { + for (pE = &(newtable[i]), e = *pE; e != NULL; e = *pE) { + idx = indexFor(newsize,e->h); + if (idx == i) + { + pE = &(e->next); + } + else + { + *pE = e->next; + e->next = newtable[idx]; + newtable[idx] = e; + } + } + } + } + h->tablelength = newsize; + h->loadlimit = (unsigned int) ceil(newsize * max_load_factor); + return -1; +} + +/*****************************************************************************/ +unsigned int +hashtable_count(struct hashtable *h) +{ + return h->entrycount; +} + +/*****************************************************************************/ +int +hashtable_insert(struct hashtable *h, void *k, void *v) +{ + /* This method allows duplicate keys - but they shouldn't be used */ + unsigned int idx; + struct entry *e; + if (++(h->entrycount) > h->loadlimit) + { + /* Ignore the return value. If expand fails, we should + * still try cramming just this value into the existing table + * -- we may not have memory for a larger table, but one more + * element may be ok. Next time we insert, we'll try expanding again.*/ + hashtable_expand(h); + } + e = (struct entry *)malloc(sizeof(struct entry)); + if (NULL == e) { --(h->entrycount); return 0; } /*oom*/ + e->h = hash(h,k); + idx = indexFor(h->tablelength,e->h); + e->k = k; + e->v = v; + e->next = h->table[idx]; + h->table[idx] = e; + return -1; +} + +/*****************************************************************************/ +void * /* returns value associated with key */ +hashtable_search(struct hashtable *h, void *k) +{ + struct entry *e; + unsigned int hashvalue, idx; + hashvalue = hash(h,k); + idx = indexFor(h->tablelength,hashvalue); + e = h->table[idx]; + while (NULL != e) + { + /* Check hash value to short circuit heavier comparison */ + if ((hashvalue == e->h) && (h->eqfn(k, e->k))) return e->v; + e = e->next; + } + return NULL; +} + +/*****************************************************************************/ +void * /* returns value associated with key */ +hashtable_remove(struct hashtable *h, void *k) +{ + /* TODO: consider compacting the table when the load factor drops enough, + * or provide a 'compact' method. */ + + struct entry *e; + struct entry **pE; + void *v; + unsigned int hashvalue, idx; + + hashvalue = hash(h,k); + idx = indexFor(h->tablelength,hash(h,k)); + pE = &(h->table[idx]); + e = *pE; + while (NULL != e) + { + /* Check hash value to short circuit heavier comparison */ + if ((hashvalue == e->h) && (h->eqfn(k, e->k))) + { + *pE = e->next; + h->entrycount--; + v = e->v; + freekey(e->k); + free(e); + return v; + } + pE = &(e->next); + e = e->next; + } + return NULL; +} + +/*****************************************************************************/ +/* destroy */ +void +hashtable_destroy(struct hashtable *h, int free_values) +{ + unsigned int i; + struct entry *e, *f; + struct entry **table = h->table; + if (free_values) + { + for (i = 0; i < h->tablelength; i++) + { + e = table[i]; + while (NULL != e) + { + f = e; + e = e->next; + freekey(f->k); + if(h->dest == NULL) + free(f->v); + else + h->dest(f->v); + free(f); + } + } + } + else + { + for (i = 0; i < h->tablelength; i++) + { + e = table[i]; + while (NULL != e) + { f = e; e = e->next; freekey(f->k); free(f); } + } + } + free(h->table); + free(h); +} + +/* some generic hash functions */ + +/* one provided by Aaaron Wiebe based on perl's hashng algorithm + * (so probably pretty generic). Not for excessively large strings! + */ +unsigned int +hash_from_string(void *k) +{ + int len; + char *rkey = (char*) k; + unsigned hashval = 1; + + len = (int) strlen(rkey); + while (len--) + hashval = hashval * 33 + *rkey++; + + return hashval; +} + + +int +key_equals_string(void *key1, void *key2) +{ + /* we must return true IF the keys are equal! */ + return !strcmp(key1, key2); +} + + +/* + * Copyright (c) 2002, Christopher Clark + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the original author; nor the names of any contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ diff --git a/runtime/hashtable/hashtable.c b/runtime/hashtable/hashtable.c deleted file mode 100644 index 41fc60fe..00000000 --- a/runtime/hashtable/hashtable.c +++ /dev/null @@ -1,313 +0,0 @@ -/* Copyright (C) 2004 Christopher Clark */ -/* taken from http://www.cl.cam.ac.uk/~cwc22/hashtable/ */ - -#include "hashtable.h" -#include "hashtable_private.h" -#include -#include -#include -#include - -/* -Credit for primes table: Aaron Krowne - http://br.endernet.org/~akrowne/ - http://planetmath.org/encyclopedia/GoodHashTablePrimes.html -*/ -static const unsigned int primes[] = { -53, 97, 193, 389, -769, 1543, 3079, 6151, -12289, 24593, 49157, 98317, -196613, 393241, 786433, 1572869, -3145739, 6291469, 12582917, 25165843, -50331653, 100663319, 201326611, 402653189, -805306457, 1610612741 -}; -const unsigned int prime_table_length = sizeof(primes)/sizeof(primes[0]); -const float max_load_factor = 0.65; - -/*****************************************************************************/ -struct hashtable * -create_hashtable(unsigned int minsize, - unsigned int (*hashf) (void*), - int (*eqf) (void*,void*), void (*dest)(void*)) -{ - struct hashtable *h; - unsigned int pindex, size = primes[0]; - /* Check requested hashtable isn't too large */ - if (minsize > (1u << 30)) return NULL; - /* Enforce size as prime */ - for (pindex=0; pindex < prime_table_length; pindex++) { - if (primes[pindex] > minsize) { size = primes[pindex]; break; } - } - h = (struct hashtable *)malloc(sizeof(struct hashtable)); - if (NULL == h) return NULL; /*oom*/ - h->table = (struct entry **)malloc(sizeof(struct entry*) * size); - if (NULL == h->table) { free(h); return NULL; } /*oom*/ - memset(h->table, 0, size * sizeof(struct entry *)); - h->tablelength = size; - h->primeindex = pindex; - h->entrycount = 0; - h->hashfn = hashf; - h->eqfn = eqf; - h->dest = dest; - h->loadlimit = (unsigned int) ceil(size * max_load_factor); - return h; -} - -/*****************************************************************************/ -unsigned int -hash(struct hashtable *h, void *k) -{ - /* Aim to protect against poor hash functions by adding logic here - * - logic taken from java 1.4 hashtable source */ - unsigned int i = h->hashfn(k); - i += ~(i << 9); - i ^= ((i >> 14) | (i << 18)); /* >>> */ - i += (i << 4); - i ^= ((i >> 10) | (i << 22)); /* >>> */ - return i; -} - -/*****************************************************************************/ -static int -hashtable_expand(struct hashtable *h) -{ - /* Double the size of the table to accomodate more entries */ - struct entry **newtable; - struct entry *e; - struct entry **pE; - unsigned int newsize, i, idx; - /* Check we're not hitting max capacity */ - if (h->primeindex == (prime_table_length - 1)) return 0; - newsize = primes[++(h->primeindex)]; - - newtable = (struct entry **)malloc(sizeof(struct entry*) * newsize); - if (NULL != newtable) - { - memset(newtable, 0, newsize * sizeof(struct entry *)); - /* This algorithm is not 'stable'. ie. it reverses the list - * when it transfers entries between the tables */ - for (i = 0; i < h->tablelength; i++) { - while (NULL != (e = h->table[i])) { - h->table[i] = e->next; - idx = indexFor(newsize,e->h); - e->next = newtable[idx]; - newtable[idx] = e; - } - } - free(h->table); - h->table = newtable; - } - /* Plan B: realloc instead */ - else - { - newtable = (struct entry **) - realloc(h->table, newsize * sizeof(struct entry *)); - if (NULL == newtable) { (h->primeindex)--; return 0; } - h->table = newtable; - memset(newtable[h->tablelength], 0, newsize - h->tablelength); - for (i = 0; i < h->tablelength; i++) { - for (pE = &(newtable[i]), e = *pE; e != NULL; e = *pE) { - idx = indexFor(newsize,e->h); - if (idx == i) - { - pE = &(e->next); - } - else - { - *pE = e->next; - e->next = newtable[idx]; - newtable[idx] = e; - } - } - } - } - h->tablelength = newsize; - h->loadlimit = (unsigned int) ceil(newsize * max_load_factor); - return -1; -} - -/*****************************************************************************/ -unsigned int -hashtable_count(struct hashtable *h) -{ - return h->entrycount; -} - -/*****************************************************************************/ -int -hashtable_insert(struct hashtable *h, void *k, void *v) -{ - /* This method allows duplicate keys - but they shouldn't be used */ - unsigned int idx; - struct entry *e; - if (++(h->entrycount) > h->loadlimit) - { - /* Ignore the return value. If expand fails, we should - * still try cramming just this value into the existing table - * -- we may not have memory for a larger table, but one more - * element may be ok. Next time we insert, we'll try expanding again.*/ - hashtable_expand(h); - } - e = (struct entry *)malloc(sizeof(struct entry)); - if (NULL == e) { --(h->entrycount); return 0; } /*oom*/ - e->h = hash(h,k); - idx = indexFor(h->tablelength,e->h); - e->k = k; - e->v = v; - e->next = h->table[idx]; - h->table[idx] = e; - return -1; -} - -/*****************************************************************************/ -void * /* returns value associated with key */ -hashtable_search(struct hashtable *h, void *k) -{ - struct entry *e; - unsigned int hashvalue, idx; - hashvalue = hash(h,k); - idx = indexFor(h->tablelength,hashvalue); - e = h->table[idx]; - while (NULL != e) - { - /* Check hash value to short circuit heavier comparison */ - if ((hashvalue == e->h) && (h->eqfn(k, e->k))) return e->v; - e = e->next; - } - return NULL; -} - -/*****************************************************************************/ -void * /* returns value associated with key */ -hashtable_remove(struct hashtable *h, void *k) -{ - /* TODO: consider compacting the table when the load factor drops enough, - * or provide a 'compact' method. */ - - struct entry *e; - struct entry **pE; - void *v; - unsigned int hashvalue, idx; - - hashvalue = hash(h,k); - idx = indexFor(h->tablelength,hash(h,k)); - pE = &(h->table[idx]); - e = *pE; - while (NULL != e) - { - /* Check hash value to short circuit heavier comparison */ - if ((hashvalue == e->h) && (h->eqfn(k, e->k))) - { - *pE = e->next; - h->entrycount--; - v = e->v; - freekey(e->k); - free(e); - return v; - } - pE = &(e->next); - e = e->next; - } - return NULL; -} - -/*****************************************************************************/ -/* destroy */ -void -hashtable_destroy(struct hashtable *h, int free_values) -{ - unsigned int i; - struct entry *e, *f; - struct entry **table = h->table; - if (free_values) - { - for (i = 0; i < h->tablelength; i++) - { - e = table[i]; - while (NULL != e) - { - f = e; - e = e->next; - freekey(f->k); - if(h->dest == NULL) - free(f->v); - else - h->dest(f->v); - free(f); - } - } - } - else - { - for (i = 0; i < h->tablelength; i++) - { - e = table[i]; - while (NULL != e) - { f = e; e = e->next; freekey(f->k); free(f); } - } - } - free(h->table); - free(h); -} - -/* some generic hash functions */ - -/* one provided by Aaaron Wiebe based on perl's hashng algorithm - * (so probably pretty generic). Not for excessively large strings! - */ -unsigned int -hash_from_string(void *k) -{ - int len; - char *rkey = (char*) k; - unsigned hashval = 1; - - len = (int) strlen(rkey); - while (len--) - hashval = hashval * 33 + *rkey++; - - return hashval; -} - - -int -key_equals_string(void *key1, void *key2) -{ - /* we must return true IF the keys are equal! */ - return !strcmp(key1, key2); -} - - -/* - * Copyright (c) 2002, Christopher Clark - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name of the original author; nor the names of any contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ diff --git a/runtime/hashtable/hashtable_itr.c b/runtime/hashtable/hashtable_itr.c deleted file mode 100644 index 5dced841..00000000 --- a/runtime/hashtable/hashtable_itr.c +++ /dev/null @@ -1,188 +0,0 @@ -/* Copyright (C) 2002, 2004 Christopher Clark */ - -#include "hashtable.h" -#include "hashtable_private.h" -#include "hashtable_itr.h" -#include /* defines NULL */ - -/*****************************************************************************/ -/* hashtable_iterator - iterator constructor */ - -struct hashtable_itr * -hashtable_iterator(struct hashtable *h) -{ - unsigned int i, tablelength; - struct hashtable_itr *itr = (struct hashtable_itr *) - malloc(sizeof(struct hashtable_itr)); - if (NULL == itr) return NULL; - itr->h = h; - itr->e = NULL; - itr->parent = NULL; - tablelength = h->tablelength; - itr->index = tablelength; - if (0 == h->entrycount) return itr; - - for (i = 0; i < tablelength; i++) - { - if (NULL != h->table[i]) - { - itr->e = h->table[i]; - itr->index = i; - break; - } - } - return itr; -} - -/*****************************************************************************/ -/* key - return the key of the (key,value) pair at the current position */ -/* value - return the value of the (key,value) pair at the current position */ - -void * -hashtable_iterator_key(struct hashtable_itr *i) -{ return i->e->k; } - -void * -hashtable_iterator_value(struct hashtable_itr *i) -{ return i->e->v; } - -/*****************************************************************************/ -/* advance - advance the iterator to the next element - * returns zero if advanced to end of table */ - -int -hashtable_iterator_advance(struct hashtable_itr *itr) -{ - unsigned int j,tablelength; - struct entry **table; - struct entry *next; - if (NULL == itr->e) return 0; /* stupidity check */ - - next = itr->e->next; - if (NULL != next) - { - itr->parent = itr->e; - itr->e = next; - return -1; - } - tablelength = itr->h->tablelength; - itr->parent = NULL; - if (tablelength <= (j = ++(itr->index))) - { - itr->e = NULL; - return 0; - } - table = itr->h->table; - while (NULL == (next = table[j])) - { - if (++j >= tablelength) - { - itr->index = tablelength; - itr->e = NULL; - return 0; - } - } - itr->index = j; - itr->e = next; - return -1; -} - -/*****************************************************************************/ -/* remove - remove the entry at the current iterator position - * and advance the iterator, if there is a successive - * element. - * If you want the value, read it before you remove: - * beware memory leaks if you don't. - * Returns zero if end of iteration. */ - -int -hashtable_iterator_remove(struct hashtable_itr *itr) -{ - struct entry *remember_e, *remember_parent; - int ret; - - /* Do the removal */ - if (NULL == (itr->parent)) - { - /* element is head of a chain */ - itr->h->table[itr->index] = itr->e->next; - } else { - /* element is mid-chain */ - itr->parent->next = itr->e->next; - } - /* itr->e is now outside the hashtable */ - remember_e = itr->e; - itr->h->entrycount--; - freekey(remember_e->k); - - /* Advance the iterator, correcting the parent */ - remember_parent = itr->parent; - ret = hashtable_iterator_advance(itr); - if (itr->parent == remember_e) { itr->parent = remember_parent; } - free(remember_e); - return ret; -} - -/*****************************************************************************/ -int /* returns zero if not found */ -hashtable_iterator_search(struct hashtable_itr *itr, - struct hashtable *h, void *k) -{ - struct entry *e, *parent; - unsigned int hashvalue, index; - - hashvalue = hash(h,k); - index = indexFor(h->tablelength,hashvalue); - - e = h->table[index]; - parent = NULL; - while (NULL != e) - { - /* Check hash value to short circuit heavier comparison */ - if ((hashvalue == e->h) && (h->eqfn(k, e->k))) - { - itr->index = index; - itr->e = e; - itr->parent = parent; - itr->h = h; - return -1; - } - parent = e; - e = e->next; - } - return 0; -} - - -/* - * Copyright (c) 2002, 2004, Christopher Clark - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name of the original author; nor the names of any contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ diff --git a/runtime/hashtable/hashtable_itr.h b/runtime/hashtable/hashtable_itr.h deleted file mode 100644 index eea699a7..00000000 --- a/runtime/hashtable/hashtable_itr.h +++ /dev/null @@ -1,112 +0,0 @@ -/* Copyright (C) 2002, 2004 Christopher Clark */ - -#ifndef __HASHTABLE_ITR_CWC22__ -#define __HASHTABLE_ITR_CWC22__ -#include "hashtable.h" -#include "hashtable_private.h" /* needed to enable inlining */ - -/*****************************************************************************/ -/* This struct is only concrete here to allow the inlining of two of the - * accessor functions. */ -struct hashtable_itr -{ - struct hashtable *h; - struct entry *e; - struct entry *parent; - unsigned int index; -}; - - -/*****************************************************************************/ -/* hashtable_iterator - */ - -struct hashtable_itr * -hashtable_iterator(struct hashtable *h); - -/*****************************************************************************/ -/* hashtable_iterator_key - * - return the value of the (key,value) pair at the current position */ - -extern inline void * -hashtable_iterator_key(struct hashtable_itr *i) -{ - return i->e->k; -} - -/*****************************************************************************/ -/* value - return the value of the (key,value) pair at the current position */ - -extern inline void * -hashtable_iterator_value(struct hashtable_itr *i) -{ - return i->e->v; -} - -/*****************************************************************************/ -/* advance - advance the iterator to the next element - * returns zero if advanced to end of table */ - -int -hashtable_iterator_advance(struct hashtable_itr *itr); - -/*****************************************************************************/ -/* remove - remove current element and advance the iterator to the next element - * NB: if you need the value to free it, read it before - * removing. ie: beware memory leaks! - * returns zero if advanced to end of table */ - -int -hashtable_iterator_remove(struct hashtable_itr *itr); - -/*****************************************************************************/ -/* search - overwrite the supplied iterator, to point to the entry - * matching the supplied key. - h points to the hashtable to be searched. - * returns zero if not found. */ -int -hashtable_iterator_search(struct hashtable_itr *itr, - struct hashtable *h, void *k); - -#define DEFINE_HASHTABLE_ITERATOR_SEARCH(fnname, keytype) \ -int fnname (struct hashtable_itr *i, struct hashtable *h, keytype *k) \ -{ \ - return (hashtable_iterator_search(i,h,k)); \ -} - - - -#endif /* __HASHTABLE_ITR_CWC22__*/ - -/* - * Copyright (c) 2002, 2004, Christopher Clark - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name of the original author; nor the names of any contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ diff --git a/runtime/hashtable/hashtable_private.h b/runtime/hashtable/hashtable_private.h deleted file mode 100644 index 10b82da4..00000000 --- a/runtime/hashtable/hashtable_private.h +++ /dev/null @@ -1,86 +0,0 @@ -/* Copyright (C) 2002, 2004 Christopher Clark */ - -#ifndef __HASHTABLE_PRIVATE_CWC22_H__ -#define __HASHTABLE_PRIVATE_CWC22_H__ - -#include "hashtable.h" - -/*****************************************************************************/ -struct entry -{ - void *k, *v; - unsigned int h; - struct entry *next; -}; - -struct hashtable { - unsigned int tablelength; - struct entry **table; - unsigned int entrycount; - unsigned int loadlimit; - unsigned int primeindex; - unsigned int (*hashfn) (void *k); - int (*eqfn) (void *k1, void *k2); - void (*dest) (void *v); /* destructor for values, if NULL use free() */ -}; - -/*****************************************************************************/ -unsigned int -hash(struct hashtable *h, void *k); - -/*****************************************************************************/ -/* indexFor */ -static inline unsigned int -indexFor(unsigned int tablelength, unsigned int hashvalue) { - return (hashvalue % tablelength); -}; - -/* Only works if tablelength == 2^N */ -/*static inline unsigned int -indexFor(unsigned int tablelength, unsigned int hashvalue) -{ - return (hashvalue & (tablelength - 1u)); -} -*/ - -/*****************************************************************************/ -#define freekey(X) free(X) -/*define freekey(X) ; */ - - -/*****************************************************************************/ - -#endif /* __HASHTABLE_PRIVATE_CWC22_H__*/ - -/* - * Copyright (c) 2002, Christopher Clark - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name of the original author; nor the names of any contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ diff --git a/runtime/hashtable_itr.c b/runtime/hashtable_itr.c new file mode 100644 index 00000000..967287f1 --- /dev/null +++ b/runtime/hashtable_itr.c @@ -0,0 +1,190 @@ +/* Copyright (C) 2002, 2004 Christopher Clark */ + +#include "hashtable.h" +#include "hashtable_private.h" +#include "hashtable_itr.h" +#include /* defines NULL */ + +/*****************************************************************************/ +/* hashtable_iterator - iterator constructor */ + +struct hashtable_itr * +hashtable_iterator(struct hashtable *h) +{ + unsigned int i, tablelength; + struct hashtable_itr *itr = (struct hashtable_itr *) + malloc(sizeof(struct hashtable_itr)); + if (NULL == itr) return NULL; + itr->h = h; + itr->e = NULL; + itr->parent = NULL; + tablelength = h->tablelength; + itr->index = tablelength; + if (0 == h->entrycount) return itr; + + for (i = 0; i < tablelength; i++) + { + if (NULL != h->table[i]) + { + itr->e = h->table[i]; + itr->index = i; + break; + } + } + return itr; +} + +/*****************************************************************************/ +/* key - return the key of the (key,value) pair at the current position */ +/* value - return the value of the (key,value) pair at the current position */ + +#if 0 /* these are now inline functions! */ +void * +hashtable_iterator_key(struct hashtable_itr *i) +{ return i->e->k; } + +void * +hashtable_iterator_value(struct hashtable_itr *i) +{ return i->e->v; } +#endif + +/*****************************************************************************/ +/* advance - advance the iterator to the next element + * returns zero if advanced to end of table */ + +int +hashtable_iterator_advance(struct hashtable_itr *itr) +{ + unsigned int j,tablelength; + struct entry **table; + struct entry *next; + if (NULL == itr->e) return 0; /* stupidity check */ + + next = itr->e->next; + if (NULL != next) + { + itr->parent = itr->e; + itr->e = next; + return -1; + } + tablelength = itr->h->tablelength; + itr->parent = NULL; + if (tablelength <= (j = ++(itr->index))) + { + itr->e = NULL; + return 0; + } + table = itr->h->table; + while (NULL == (next = table[j])) + { + if (++j >= tablelength) + { + itr->index = tablelength; + itr->e = NULL; + return 0; + } + } + itr->index = j; + itr->e = next; + return -1; +} + +/*****************************************************************************/ +/* remove - remove the entry at the current iterator position + * and advance the iterator, if there is a successive + * element. + * If you want the value, read it before you remove: + * beware memory leaks if you don't. + * Returns zero if end of iteration. */ + +int +hashtable_iterator_remove(struct hashtable_itr *itr) +{ + struct entry *remember_e, *remember_parent; + int ret; + + /* Do the removal */ + if (NULL == (itr->parent)) + { + /* element is head of a chain */ + itr->h->table[itr->index] = itr->e->next; + } else { + /* element is mid-chain */ + itr->parent->next = itr->e->next; + } + /* itr->e is now outside the hashtable */ + remember_e = itr->e; + itr->h->entrycount--; + freekey(remember_e->k); + + /* Advance the iterator, correcting the parent */ + remember_parent = itr->parent; + ret = hashtable_iterator_advance(itr); + if (itr->parent == remember_e) { itr->parent = remember_parent; } + free(remember_e); + return ret; +} + +/*****************************************************************************/ +int /* returns zero if not found */ +hashtable_iterator_search(struct hashtable_itr *itr, + struct hashtable *h, void *k) +{ + struct entry *e, *parent; + unsigned int hashvalue, index; + + hashvalue = hash(h,k); + index = indexFor(h->tablelength,hashvalue); + + e = h->table[index]; + parent = NULL; + while (NULL != e) + { + /* Check hash value to short circuit heavier comparison */ + if ((hashvalue == e->h) && (h->eqfn(k, e->k))) + { + itr->index = index; + itr->e = e; + itr->parent = parent; + itr->h = h; + return -1; + } + parent = e; + e = e->next; + } + return 0; +} + + +/* + * Copyright (c) 2002, 2004, Christopher Clark + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the original author; nor the names of any contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ diff --git a/runtime/hashtable_itr.h b/runtime/hashtable_itr.h new file mode 100644 index 00000000..1c206b6e --- /dev/null +++ b/runtime/hashtable_itr.h @@ -0,0 +1,112 @@ +/* Copyright (C) 2002, 2004 Christopher Clark */ + +#ifndef __HASHTABLE_ITR_CWC22__ +#define __HASHTABLE_ITR_CWC22__ +#include "hashtable.h" +#include "hashtable_private.h" /* needed to enable inlining */ + +/*****************************************************************************/ +/* This struct is only concrete here to allow the inlining of two of the + * accessor functions. */ +struct hashtable_itr +{ + struct hashtable *h; + struct entry *e; + struct entry *parent; + unsigned int index; +}; + + +/*****************************************************************************/ +/* hashtable_iterator + */ + +struct hashtable_itr * +hashtable_iterator(struct hashtable *h); + +/*****************************************************************************/ +/* hashtable_iterator_key + * - return the value of the (key,value) pair at the current position */ + +static inline void * +hashtable_iterator_key(struct hashtable_itr *i) +{ + return i->e->k; +} + +/*****************************************************************************/ +/* value - return the value of the (key,value) pair at the current position */ + +static inline void * +hashtable_iterator_value(struct hashtable_itr *i) +{ + return i->e->v; +} + +/*****************************************************************************/ +/* advance - advance the iterator to the next element + * returns zero if advanced to end of table */ + +int +hashtable_iterator_advance(struct hashtable_itr *itr); + +/*****************************************************************************/ +/* remove - remove current element and advance the iterator to the next element + * NB: if you need the value to free it, read it before + * removing. ie: beware memory leaks! + * returns zero if advanced to end of table */ + +int +hashtable_iterator_remove(struct hashtable_itr *itr); + +/*****************************************************************************/ +/* search - overwrite the supplied iterator, to point to the entry + * matching the supplied key. + h points to the hashtable to be searched. + * returns zero if not found. */ +int +hashtable_iterator_search(struct hashtable_itr *itr, + struct hashtable *h, void *k); + +#define DEFINE_HASHTABLE_ITERATOR_SEARCH(fnname, keytype) \ +int fnname (struct hashtable_itr *i, struct hashtable *h, keytype *k) \ +{ \ + return (hashtable_iterator_search(i,h,k)); \ +} + + + +#endif /* __HASHTABLE_ITR_CWC22__*/ + +/* + * Copyright (c) 2002, 2004, Christopher Clark + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the original author; nor the names of any contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ diff --git a/runtime/hashtable_private.h b/runtime/hashtable_private.h new file mode 100644 index 00000000..10b82da4 --- /dev/null +++ b/runtime/hashtable_private.h @@ -0,0 +1,86 @@ +/* Copyright (C) 2002, 2004 Christopher Clark */ + +#ifndef __HASHTABLE_PRIVATE_CWC22_H__ +#define __HASHTABLE_PRIVATE_CWC22_H__ + +#include "hashtable.h" + +/*****************************************************************************/ +struct entry +{ + void *k, *v; + unsigned int h; + struct entry *next; +}; + +struct hashtable { + unsigned int tablelength; + struct entry **table; + unsigned int entrycount; + unsigned int loadlimit; + unsigned int primeindex; + unsigned int (*hashfn) (void *k); + int (*eqfn) (void *k1, void *k2); + void (*dest) (void *v); /* destructor for values, if NULL use free() */ +}; + +/*****************************************************************************/ +unsigned int +hash(struct hashtable *h, void *k); + +/*****************************************************************************/ +/* indexFor */ +static inline unsigned int +indexFor(unsigned int tablelength, unsigned int hashvalue) { + return (hashvalue % tablelength); +}; + +/* Only works if tablelength == 2^N */ +/*static inline unsigned int +indexFor(unsigned int tablelength, unsigned int hashvalue) +{ + return (hashvalue & (tablelength - 1u)); +} +*/ + +/*****************************************************************************/ +#define freekey(X) free(X) +/*define freekey(X) ; */ + + +/*****************************************************************************/ + +#endif /* __HASHTABLE_PRIVATE_CWC22_H__*/ + +/* + * Copyright (c) 2002, Christopher Clark + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the original author; nor the names of any contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ -- cgit From 4b9a92bc725f7436b7958e673a9665a90b548e86 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Mon, 4 Oct 2010 12:46:39 +0000 Subject: omhdfs: fixed some issues with previous commit --- plugins/omhdfs/omhdfs.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/plugins/omhdfs/omhdfs.c b/plugins/omhdfs/omhdfs.c index eefea722..b075432d 100644 --- a/plugins/omhdfs/omhdfs.c +++ b/plugins/omhdfs/omhdfs.c @@ -254,7 +254,6 @@ fileWrite(file_t *pFile, uchar *buf) size_t lenWrite; DEFiRet; - assert(pFile->fh != NULL); if(pFile->nUsers > 1) d_pthread_mutex_lock(&pFile->mut); @@ -391,17 +390,18 @@ BEGINdoHUP file_t *pFile; struct hashtable_itr *itr; CODESTARTdoHUP - /* Iterator constructor only returns a valid iterator if - * the hashtable is not empty */ - itr = hashtable_iterator(files); - if(hashtable_count(files) > 0) - { - do { - pFile = (file_t *) hashtable_iterator_value(itr); - fileClose(pFile); - DBGPRINTF("imuxsock: HUP, closing file %s\n", pFile->name); - } while (hashtable_iterator_advance(itr)); - } + DBGPRINTF("omhdfs: HUP received (file count %d)\n", hashtable_count(files)); + /* Iterator constructor only returns a valid iterator if + * the hashtable is not empty */ + itr = hashtable_iterator(files); + if(hashtable_count(files) > 0) + { + do { + pFile = (file_t *) hashtable_iterator_value(itr); + fileClose(pFile); + DBGPRINTF("omhdfs: HUP, closing file %s\n", pFile->name); + } while (hashtable_iterator_advance(itr)); + } ENDdoHUP -- cgit From a627ac99ba2c3404ca926a19fb06cbd6f43b53c8 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Mon, 4 Oct 2010 16:25:49 +0200 Subject: omhdfs: added ability to create non-existing directories in name path --- plugins/omhdfs/omhdfs.c | 128 +++++++++++++++++++++++++++--------------------- 1 file changed, 72 insertions(+), 56 deletions(-) diff --git a/plugins/omhdfs/omhdfs.c b/plugins/omhdfs/omhdfs.c index b075432d..9705b7fd 100644 --- a/plugins/omhdfs/omhdfs.c +++ b/plugins/omhdfs/omhdfs.c @@ -97,54 +97,41 @@ CODESTARTdbgPrintInstInfo ENDdbgPrintInstInfo - -#if 0 -static void prepareFile(instanceData *pData, uchar *newFileName) +/* note that hdfsFileExists() does not work, so we did our + * own function to see if a pathname exists. Returns 0 if the + * file does not exists, something else otherwise. Note that + * we can also check a directroy (if that matters...) + */ +static int +HDFSFileExists(hdfsFS fs, uchar *name) { - if(access((char*)newFileName, F_OK) == 0) { - /* file already exists */ - pData->fh = open((char*) newFileName, O_WRONLY|O_APPEND|O_CREAT|O_NOCTTY, - pData->fCreateMode); + int r; + hdfsFileInfo *info; + + info = hdfsGetPathInfo(fs, (char*)name); + /* if things go wrong, we assume it is because the file + * does not exist. We do not get too much information... + */ + if(info == NULL) { + r = 0; } else { - pData->fh = -1; - /* file does not exist, create it (and eventually parent directories */ - if(pData->bCreateDirs) { - /* we fist need to create parent dirs if they are missing - * We do not report any errors here ourselfs but let the code - * fall through to error handler below. - */ - if(makeFileParentDirs(newFileName, strlen((char*)newFileName), - pData->fDirCreateMode, pData->dirUID, - pData->dirGID, pData->bFailOnChown) != 0) { - return; /* we give up */ - } - } - /* no matter if we needed to create directories or not, we now try to create - * the file. -- rgerhards, 2008-12-18 (based on patch from William Tisater) - */ - pData->fh = open((char*) newFileName, O_WRONLY|O_APPEND|O_CREAT|O_NOCTTY, - pData->fCreateMode); - if(pData->fh != -1) { - /* check and set uid/gid */ - if(pData->fileUID != (uid_t)-1 || pData->fileGID != (gid_t) -1) { - /* we need to set owner/group */ - if(fchown(pData->fh, pData->fileUID, - pData->fileGID) != 0) { - if(pData->bFailOnChown) { - int eSave = errno; - close(pData->fh); - pData->fh = -1; - errno = eSave; - } - /* we will silently ignore the chown() failure - * if configured to do so. - */ - } - } - } + r = 1; + hdfsFreeFileInfo(info, 1); } + return r; +} + +static inline rsRetVal +HDFSmkdir(hdfsFS fs, uchar *name) +{ + DEFiRet; + if(hdfsCreateDirectory(fs, (char*)name) == -1) + ABORT_FINALIZE(RS_RET_ERR); + +finalize_it: + RETiRet; } -#endif + /* ---BEGIN FILE OBJECT---------------------------------------------------- */ /* This code handles the "file object". This is split from the actual @@ -195,6 +182,42 @@ fileObjDestruct(file_t **ppFile) return RS_RET_OK; } + +/* check, and potentially create, all names inside a path */ +static rsRetVal +filePrepare(file_t *pFile) +{ + uchar *p; + uchar *pszWork; + size_t len; + DEFiRet; + + if(HDFSFileExists(pFile->fs, pFile->name)) + FINALIZE; + + /* file does not exist, create it (and eventually parent directories */ + if(1) { // check if bCreateDirs + len = ustrlen(pFile->name) + 1; + CHKmalloc(pszWork = MALLOC(sizeof(uchar) * len)); + memcpy(pszWork, pFile->name, len); + for(p = pszWork+1 ; *p ; p++) + if(*p == '/') { + /* temporarily terminate string, create dir and go on */ + *p = '\0'; + if(!HDFSFileExists(pFile->fs, pszWork)) { + CHKiRet(HDFSmkdir(pFile->fs, pszWork)); + } + *p = '/'; + } + free(pszWork); + return 0; + } + +finalize_it: + RETiRet; +} + + /* this function is to be used as destructor for the * hash table code. */ @@ -222,6 +245,9 @@ fileOpen(file_t *pFile) DBGPRINTF("omhdfs: error can not connect to hdfs\n"); ABORT_FINALIZE(RS_RET_SUSPENDED); } + + CHKiRet(filePrepare(pFile)); + pFile->fh = hdfsOpenFile(pFile->fs, (char*)pFile->name, O_WRONLY|O_APPEND, 0, 0, 0); if(pFile->fh == NULL) { /* maybe the file does not exist, so we try to create it now. @@ -271,7 +297,8 @@ fileWrite(file_t *pFile, uchar *buf) tSize num_written_bytes = hdfsWrite(pFile->fs, pFile->fh, buf, lenWrite); if((unsigned) num_written_bytes != lenWrite) { errmsg.LogError(errno, RS_RET_ERR_HDFS_WRITE, "omhdfs: failed to write %s, expected %lu bytes, " - "written %lu\n", pFile->name, lenWrite, (unsigned long) num_written_bytes); + "written %lu\n", pFile->name, (unsigned long) lenWrite, + (unsigned long) num_written_bytes); ABORT_FINALIZE(RS_RET_SUSPENDED); } @@ -410,17 +437,6 @@ ENDdoHUP */ static rsRetVal resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unused)) *pVal) { -/* - fileUID = -1; - fileGID = -1; - dirUID = -1; - dirGID = -1; - bFailOnChown = 1; - iDynaFileCacheSize = 10; - fCreateMode = 0644; - fDirCreateMode = 0700; - bCreateDirs = 1; -*/ hdfsHost = NULL; hdfsPort = 0; return RS_RET_OK; -- cgit