From 8bcea4710bd328ef74dc852d9fdffb3c47ed8abe Mon Sep 17 00:00:00 2001 From: Matias Larsson Date: Thu, 24 Nov 2022 18:22:55 +0100 Subject: Fix libpinyin crash on ARMv7 Fix the alignment trap in get_unigram_frequency(). Fix also other places where this same trap could happen (depending on compiler and surrounding code). The trap happened when the ARM GCC generated a SIMD instruction (specifically VLDR) to load 32 bits in a single instruction, and when the memory address was not aligned to 32 bits. VLDR traps if the address is not aligned. GCC generated the instruction because of the cast to uint32 from the address. The fix is to allocate a uint32 variable in stack and use memcpy to copy the data to that variable. This way we ensure that appropriate instructions are generated. **Links** About the issue with GCC: https://trust-in-soft.com/blog/2020/04/06/gcc-always-assumes-aligned-pointer-accesses/ How Linux does it: https://elixir.bootlin.com/linux/v5.10.155/source/include/linux/unaligned/memmove.h#L13 ARM documentation: https://documentation-service.arm.com/static/5f8dc043f86e16515cdbbc92?token= See 'A3.2.1 Unaligned data access' --- src/include/memory_chunk.h | 34 +++++++++++++++++++++++++++++++++ src/include/unaligned_memory.h | 43 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 src/include/unaligned_memory.h (limited to 'src/include') diff --git a/src/include/memory_chunk.h b/src/include/memory_chunk.h index c106dba..baa5679 100644 --- a/src/include/memory_chunk.h +++ b/src/include/memory_chunk.h @@ -289,6 +289,25 @@ public: return true; } + /** + * MemoryChunk::set_content: + * @offset: the offset in this MemoryChunk. + * @data: the data to be copied. + * @returns: whether the data is copied successfully. + * + * Data are written directly to the memory area in this MemoryChunk. + * + */ + template + bool set_content(size_t offset, T data){ + const size_t len = sizeof(data); + size_t cursize = std_lite::max(size(), offset + len); + ensure_has_space(offset + len); + memmove(m_data_begin + offset, &data, len); + m_data_end = m_data_begin + cursize; + return true; + } + /** * MemoryChunk::append_content: * @data: the begin of the data to be copied. @@ -356,6 +375,21 @@ public: return true; } + /** + * MemoryChunk::get_content: + * @offset: the offset in this MemoryChunk. + * @returns: the content + * + * Get the content in this MemoryChunk. + * + */ + template + T get_content(size_t offset) const { + T value; + memcpy(&value, m_data_begin + offset, sizeof(value)); + return value; + } + /** * MemoryChunk::compact_memory: * diff --git a/src/include/unaligned_memory.h b/src/include/unaligned_memory.h new file mode 100644 index 0000000..27b2f19 --- /dev/null +++ b/src/include/unaligned_memory.h @@ -0,0 +1,43 @@ +#ifndef UNALIGNED_MEMORY_H +#define UNALIGNED_MEMORY_H + +#include + +/** + * UnalignedMemory: Safe unaligned memory access. + * + * Some instruction sets, or some instructions in some instruction sets + * require that memory access is aligned to a specific boundary. These + * instructions may trap on unaligned access. + * + * This class provides methods to load and store values at unaligned + * addresses. It ensures that the compiler doesn't generate instructions + * that could trap on the unaligned memory access. + */ + +namespace pinyin{ + template + class UnalignedMemory{ + public: + /** + * Read a value from a possibly unaligned memory address. + * + */ + static T load(const void * src) { + T value; + memcpy(&value, src, sizeof(T)); + return value; + } + + /** + * Store a value into a possibly unaligned memory address. + * + */ + static void store(T value, void * dest) { + memcpy(dest, &value, sizeof(T)); + } + }; +}; + + +#endif -- cgit