/* * libpinyin * Library to deal with pinyin. * * Copyright (C) 2006-2007 Peng Wu * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef MEMORY_CHUNK_H #define MEMORY_CHUNK_H #include "config.h" #include #include #include #include #include #include #ifdef HAVE_MMAP #include #define LIBPINYIN_USE_MMAP #endif #include "stl_lite.h" namespace pinyin{ /* for unmanaged mode * m_free_func == free, when memory is allocated by malloc * m_free_func == munmap, when memory is allocated by mmap * m_free_func == NULL, * when memory is in small protion of allocated area * m_free_func == other, * malloc then free. */ /** * MemoryChunk: * * The utility to manage the memory chunks. * */ class MemoryChunk{ typedef void (* free_func_t)(...); private: char * m_data_begin; char * m_data_end; //one data pass the end. char * m_allocated; //one data pass the end. free_func_t m_free_func; const gint32 header = sizeof(guint32) * 2; private: void freemem(){ if ((free_func_t)free == m_free_func) free(m_data_begin); #ifdef LIBPINYIN_USE_MMAP else if ((free_func_t)munmap == m_free_func) /* we hide the file header in mmap. */ munmap(m_data_begin - header, header + capacity()); #endif else assert(FALSE); } void reset(){ if (m_free_func) freemem(); m_data_begin = NULL; m_data_end = NULL; m_allocated = NULL; m_free_func = NULL; } void ensure_has_space(size_t new_size){ int delta_size = m_data_begin + new_size - m_data_end; if ( delta_size <= 0 ) return; ensure_has_more_space ( delta_size ); } /* enlarge function */ void ensure_has_more_space(size_t extra_size){ if ( 0 == extra_size ) return; size_t newsize; size_t cursize = size(); if ( m_free_func != (free_func_t)free ) { /* copy on resize */ newsize = cursize + extra_size; /* do the copy */ char * tmp = (char *) malloc(newsize); assert(tmp); memset(tmp, 0, newsize); memmove(tmp, m_data_begin, cursize); /* free the origin memory */ if (m_free_func) freemem(); /* change varibles */ m_data_begin = tmp; m_data_end = m_data_begin + cursize; m_allocated = m_data_begin + newsize; m_free_func = (free_func_t)free; return; } /* the memory area is managed by this memory chunk */ if ( extra_size <= (size_t) (m_allocated - m_data_end)) return; newsize = std_lite::max( capacity()<<1, cursize + extra_size); m_data_begin = (char *) realloc(m_data_begin, newsize); assert(m_data_begin); memset(m_data_begin + cursize, 0, newsize - cursize); m_data_end = m_data_begin + cursize; m_allocated = m_data_begin + newsize; return; } guint32 get_check_sum(const char * data, guint32 length){ guint32 checksum = 0x0; guint32 aligns = length & ~0x3; /* checksum for aligned parts. */ guint32 index = 0; for (; index < aligns; index += sizeof(guint32)) { const char * p = data + index; /* use little endian here. */ guint32 item = *p | *(p + 1) << 8 | *(p + 2) << 16 | *(p + 3) << 24; checksum ^= item; } /* checksum for remained parts. */ guint32 shift = 0; for (; index < length; index++) { const char * p = data + index; guint32 item = *p << shift; shift += 8; checksum ^= item; } return checksum; } public: /** * MemoryChunk::MemoryChunk: * * The constructor of the MemoryChunk. * */ MemoryChunk(){ m_data_begin = NULL; m_data_end = NULL; m_allocated = NULL; m_free_func = NULL; } /** * MemoryChunk::~MemoryChunk: * * The destructor of the MemoryChunk. * */ ~MemoryChunk(){ reset(); } /** * MemoryChunk::begin: * * Read access method, to get the begin of the MemoryChunk. * */ void* begin() const{ return m_data_begin; } /** * MemoryChunk::end: * * Write access method, to get the end of the MemoryChunk. * */ void* end() const{ return m_data_end; } /** * MemoryChunk::size: * * Get the size of the content in the MemoryChunk. * */ size_t size() const{ return m_data_end - m_data_begin; } /** * MemoryChunk::set_size: * * Set the size of the content in the MemoryChunk. * */ void set_size(size_t newsize){ ensure_has_space(newsize); m_data_end = m_data_begin + newsize; } /** * MemoryChunk::capacity: * * Get the capacity of the MemoryChunk. * */ size_t capacity(){ return m_allocated - m_data_begin; } /** * MemoryChunk::set_chunk: * @begin: the begin of the data * @length: the length of the data * @free_func: the function to free the data * * Transfer management of a memory chunk allocated by other part of the * system to the memory chunk. * */ void set_chunk(void* begin, size_t length, free_func_t free_func){ if (m_free_func) freemem(); m_data_begin = (char *) begin; m_data_end = (char *) m_data_begin + length; m_allocated = (char *) m_data_begin + length; m_free_func = free_func; } /** * MemoryChunk::get_sub_chunk: * @offset: the offset in this MemoryChunk. * @length: the data length to be retrieved. * @returns: the newly allocated MemoryChunk. * * Get a sub MemoryChunk from this MemoryChunk. * * Note: use set_chunk internally. * the returned new chunk need to be deleted. * */ MemoryChunk * get_sub_chunk(size_t offset, size_t length){ MemoryChunk * retval = new MemoryChunk(); char * begin_pos = m_data_begin + offset; retval->set_chunk(begin_pos, length, NULL); return retval; } /** * MemoryChunk::set_content: * @offset: the offset in this MemoryChunk. * @data: the begin of the data to be copied. * @len: the length of the data to be copied. * @returns: whether the data is copied successfully. * * Data are written directly to the memory area in this MemoryChunk. * */ bool set_content(size_t offset, const void * data, size_t len){ size_t cursize = std_lite::max(size(), offset + len); ensure_has_space(offset + len); memmove(m_data_begin + offset, data, len); m_data_end = m_data_begin + cursize; return true; } /** * MemoryChunk::append_content: * @data: the begin of the data to be copied. * @len: the length of the data to be copied. * @returns: whether the data is appended successfully. * * Data are appended at the end of the MemoryChunk. * */ bool append_content(const void * data, size_t len){ return set_content(size(), data, len); } /** * MemoryChunk::insert_content: * @offset: the offset in this MemoryChunk, which starts from zero. * @data: the begin of the data to be copied. * @length: the length of the data to be copied. * @returns: whether the data is inserted successfully. * * Data are written to the memory area, * the original content are moved towards the rear. * */ bool insert_content(size_t offset, const void * data, size_t length){ ensure_has_more_space(length); size_t move_size = size() - offset; memmove(m_data_begin + offset + length, m_data_begin + offset, move_size); memmove(m_data_begin + offset, data, length); m_data_end += length; return true; } /** * MemoryChunk::remove_content: * @offset: the offset in this MemoryChunk. * @length: the length of the removed content. * @returns: whether the content is removed successfully. * * Data are removed directly, * the following content are moved towards the front. * */ bool remove_content(size_t offset, size_t length){ size_t move_size = size() - offset - length; memmove(m_data_begin + offset, m_data_begin + offset + length, move_size); m_data_end -= length; return true; } /** * MemoryChunk::get_content: * @offset: the offset in this MemoryChunk. * @buffer: the buffer to retrieve the content. * @length: the length of content to be retrieved. * @returns: whether the content is retrieved. * * Get the content in this MemoryChunk. * */ bool get_content(size_t offset, void * buffer, size_t length){ if ( size() < offset + length ) return false; memcpy( buffer, m_data_begin + offset, length); return true; } /** * MemoryChunk::compact_memory: * * Compact memory, reduce the size. * */ void compact_memory(){ if ( m_free_func != (free_func_t)free ) return; size_t newsize = size(); m_data_begin = (char *) realloc(m_data_begin, newsize); m_allocated = m_data_begin + newsize; } /** * MemoryChunk::load: * @filename: load the MemoryChunk from the filename. * @returns: whether the load is successful. * * Load the content from the filename. * */ bool load(const char * filename){ /* free old data */ reset(); int fd = open(filename, O_RDONLY); if (-1 == fd) return false; off_t file_size = lseek(fd, 0, SEEK_END); lseek(fd, 0, SEEK_SET); if (file_size < header) { close(fd); return false; } guint32 length = 0; ssize_t ret_len = read(fd, &length, sizeof(guint32)); assert(ret_len == sizeof(length)); guint32 checksum = 0; ret_len = read(fd, &checksum, sizeof(guint32)); assert(ret_len == sizeof(checksum)); guint32 data_len = file_size - header; if (data_len != length) { close(fd); return false; } char * data = (char *) malloc(data_len); if ( !data ){ close(fd); return false; } data_len = read(fd, data, data_len); guint32 calc = get_check_sum(data, data_len); if (checksum != calc) { free(data); close(fd); return false; } set_chunk(data, data_len, (free_func_t)free); close(fd); return true; } #ifdef LIBPINYIN_USE_MMAP /** * MemoryChunk::mmap: * @filename: mmap the MemoryChunk from the filename. * @returns: whether the mmap is successful. * * mmap the content from the filename. * */ bool mmap(const char * filename){ /* free old data */ reset(); int fd = open(filename, O_RDONLY); if (-1 == fd) return false; off_t file_size = lseek(fd, 0, SEEK_END); lseek(fd, 0, SEEK_SET); if (file_size < header) { close(fd); return false; } guint32 length = 0; ssize_t ret_len = read(fd, &length, sizeof(guint32)); assert(ret_len == sizeof(length)); guint32 checksum = 0; ret_len = read(fd, &checksum, sizeof(guint32)); assert(ret_len == sizeof(checksum)); guint32 data_len = file_size - header; if (data_len != length) { close(fd); return false; } char * data = (char *)::mmap(NULL, file_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); if (MAP_FAILED == data) { close(fd); return false; } data = data + header; guint32 calc = get_check_sum(data, data_len); if (checksum != calc) { munmap(data - header, file_size); close(fd); return false; } set_chunk(data, data_len, (free_func_t)munmap); close(fd); return true; } #endif /** * MemoryChunk::save: * @filename: save this MemoryChunk to the filename. * @returns: whether the save is successful. * * Save the content to the filename. * */ bool save(const char * filename){ int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644); if ( -1 == fd ) return false; guint32 length = size(); ssize_t ret_len = write(fd, &length, sizeof(guint32)); assert(ret_len == sizeof(length)); guint32 checksum = get_check_sum(m_data_begin, size()); ret_len = write(fd, &checksum, sizeof(guint32)); assert(ret_len == sizeof(checksum)); ret_len = write(fd, begin(), size()); if (ret_len != (ssize_t) size()){ close(fd); return false; } fsync(fd); close(fd); return true; } }; }; #endif