スキップしてメイン コンテンツに移動

C++11 と IPP と TBB と ICU で grep

11 年前に雑に 1 から作った世界一早い grep を目指して作ったソースコードを発掘できたので化石として残しておく。
正規表現だけではなく Google 検索のような AND/OR 条件でも検索できる。
ファイル名やディレクトリの検索対象の条件もきめ細かく設定可能。
11 年前という事で C++11 で書かれているため古いので注意。
Intel IPP と Intel TBB も当時の仕様なので注意。
完全に趣味で作っていた為コメントが一切ないのと、C++ あるあるとして template と inline が伝染してヘッダーファイルが巨大化している。

ipp_allocator.hpp

#ifndef UT_IPP_ALLOCATOR_HPP_20131227 #define UT_IPP_ALLOCATOR_HPP_20131227 #if defined(_MSC_VER) && (_MSC_VER >= 1020) # pragma once #endif #include <limits> #include <boost/throw_exception.hpp> #include <ippcore.h> #include <ipps.h> namespace ut { namespace ipp { template <class T> void* ippsMalloc(int len); // 定義しない(static_assert になる) template <> inline void* ippsMalloc<Ipp8u>(int len) { return ippsMalloc_8u(len); } template <> inline void* ippsMalloc<Ipp16u>(int len) { return ippsMalloc_16u(len); } template <> inline void* ippsMalloc<Ipp32u>(int len) { return ippsMalloc_32u(len); } template <> inline void* ippsMalloc<Ipp8s>(int len) { return ippsMalloc_8s(len); } template <> inline void* ippsMalloc<Ipp16s>(int len) { return ippsMalloc_16s(len); } template <> inline void* ippsMalloc<Ipp32s>(int len) { return ippsMalloc_32s(len); } template <> inline void* ippsMalloc<Ipp64s>(int len) { return ippsMalloc_64s(len); } template <> inline void* ippsMalloc<Ipp32f>(int len) { return ippsMalloc_32f(len); } template <> inline void* ippsMalloc<Ipp64f>(int len) { return ippsMalloc_64f(len); } template <> inline void* ippsMalloc<Ipp8sc>(int len) { return ippsMalloc_8sc(len); } template <> inline void* ippsMalloc<Ipp16sc>(int len) { return ippsMalloc_16sc(len); } template <> inline void* ippsMalloc<Ipp32sc>(int len) { return ippsMalloc_32sc(len); } template <> inline void* ippsMalloc<Ipp64sc>(int len) { return ippsMalloc_64sc(len); } template <> inline void* ippsMalloc<Ipp32fc>(int len) { return ippsMalloc_32fc(len); } template <> inline void* ippsMalloc<Ipp64fc>(int len) { return ippsMalloc_64fc(len); } template <> inline void* ippsMalloc<char>(int len) { return ippsMalloc_8u(len); } template <> inline void* ippsMalloc<wchar_t>(int len) { return ippsMalloc_16u(len); } template <class T, class real_type = T> class ipp_allocator { public: typedef T value_type; typedef value_type* pointer; typedef const value_type* const_pointer; typedef value_type& reference; typedef const value_type& const_reference; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; typedef real_type real_type; template <class U> struct rebind { typedef ipp_allocator<U, real_type> other; }; ipp_allocator(){} template <class U> ipp_allocator(const ipp_allocator<U, real_type>&) {} static pointer address(reference r) { return &r; } static const_pointer address(const_reference r) { return &r; } static size_type max_size() { return (std::numeric_limits<size_type>::max)(); } static void construct(const pointer p, const value_type& v) { new (p) T(v); } static void destroy(const pointer p) { p; p->~T(); } bool operator==(const ipp_allocator&) const { return true; } bool operator!=(const ipp_allocator&) const { return false; } static pointer allocate(const size_type n) { void* const p = ippMalloc(n * sizeof(T)); if (!p) { boost::throw_exception(std::bad_alloc()); } return static_cast<pointer>(p); } static pointer allocate(const size_type n, const void* const) { return allocate(n); } static void deallocate(const pointer p, const size_type) { ippFree(p); } }; } } #endif
ipp_string.hpp

#ifndef UT_IPP_STRING_HPP_20131227 #define UT_IPP_STRING_HPP_20131227 #if defined(_MSC_VER) && (_MSC_VER >= 1020) # pragma once #endif #include <string> #include "ipp_allocator.hpp" namespace ut { namespace ipp { typedef std::basic_string<char, std::char_traits<char>, ipp::ipp_allocator<char> > ipp_string; typedef std::basic_string<wchar_t, std::char_traits<wchar_t>, ipp::ipp_allocator<wchar_t> > ipp_wstring; typedef std::basic_string<char16_t, std::char_traits<char16_t>, ipp::ipp_allocator<char16_t> > ipp_u16string; typedef std::basic_string<char32_t, std::char_traits<char32_t>, ipp::ipp_allocator<char32_t> > ipp_u32string; } } #endif
io.hpp

#ifndef UT_IO_HPP_20131227 #define UT_IO_HPP_20131227 #if defined(_MSC_VER) && (_MSC_VER >= 1020) # pragma once #endif #include <algorithm> #include <list> #include <memory> #include <thread> #include <stdexcept> #include <string> #include <type_traits> #include <utility> #include <vector> #include <boost/ptr_container/ptr_vector.hpp> #include <boost/exception/all.hpp> #include <boost/noncopyable.hpp> #include <boost/shared_array.hpp> #include <windows.h> #include <atlbase.h> #include <tbb/compat/thread> #include <tbb/concurrent_queue.h> #include <tbb/enumerable_thread_specific.h> #include <tbb/task.h> #include <ippch.h> #include <ipps.h> #include <unicode/utf8.h> #include <unicode/utf16.h> #include <unicode/ucnv.h> #include "safe_close.hpp" #include "safe_free.hpp" #include "raii.hpp" #include "win.hpp" #include "ipp_allocator.hpp" #include "ipp_string.hpp" namespace ut { namespace io { namespace icu { class CharsetDetector; } template <class T> HANDLE openFile(const T& path); class Reader { public: virtual ~Reader() {} virtual void read(unsigned char* buffer, DWORD numberOfBytesToRead, DWORD& numberOfBytesRead) = 0; }; class SyncReader : public Reader { public: SyncReader(HANDLE handle) : handle_(handle) {} virtual ~SyncReader() {} static void readFile(HANDLE handle, unsigned char* buffer, DWORD numberOfBytesToRead, DWORD& numberOfBytesRead); void read(unsigned char* buffer, DWORD numberOfBytesToRead, DWORD& numberOfBytesRead) override; private: HANDLE handle_; }; class ParallelSyncReader : public Reader { public: ParallelSyncReader(HANDLE handle) : reader_(handle) {} void read(unsigned char* buffer, DWORD numberOfBytesToRead, DWORD& numberOfBytesRead) override; private: SyncReader reader_; }; class AsyncReader : public Reader { public: AsyncReader(std::size_t size, HANDLE handle); virtual ~AsyncReader() {} static void readFileEx(HANDLE handle, unsigned char* buffer, DWORD numberOfBytesToRead, LPOVERLAPPED overlapped, LPOVERLAPPED_COMPLETION_ROUTINE onComplete); void read(unsigned char* buffer, DWORD numberOfBytesToRead, DWORD& numberOfBytesRead) override; private: void read(unsigned char* buffer, DWORD numberOfBytesToRead); void fileIOCompletion(DWORD errorCode, DWORD bytesTransferred, LPOVERLAPPED overlapped); private: const std::size_t SIZE_; HANDLE handle_; OVERLAPPED overlapped_; long long fpointer_; int availableDataLen_; int carryoveredDataLen_; std::shared_ptr<unsigned char> data_; }; class SyncReadWorker : public Reader { public: class Request { public: Request() : handle_(nullptr), event_(nullptr), buffer_(nullptr), numberOfBytesToRead_(0), numberOfBytesRead_(nullptr) {} Request(HANDLE handle, HANDLE event, unsigned char* buffer, DWORD numberOfBytesToRead, DWORD* numberOfBytesRead) : handle_(handle), event_(event), buffer_(buffer), numberOfBytesToRead_(numberOfBytesToRead), numberOfBytesRead_(numberOfBytesRead) {} void operator()(); private: HANDLE handle_; HANDLE event_; unsigned char* buffer_; DWORD numberOfBytesToRead_; DWORD* numberOfBytesRead_; }; SyncReadWorker(HANDLE handle, HANDLE event); virtual ~SyncReadWorker() {} void read(unsigned char* buffer, DWORD numberOfBytesToRead, DWORD& numberOfBytesRead) override; static void start(); static void stop(); private: static void work(); private: HANDLE handle_; HANDLE event_; static tbb::atomic<bool> requestStop_; static tbb::concurrent_bounded_queue<Request> requests_; static boost::ptr_vector<std::thread> workers_; }; class ForEachLine : boost::noncopyable { public: ForEachLine(HANDLE file, ut::io::icu::CharsetDetector& detector); ~ForEachLine(); template <class F> bool operator()(F& callback); private: // CRLF -> 00LF, CR -> LF, LF(LE) -> LF(BE) static void replaceAllCRLFWith00LF(unsigned char* src, int len); // multibyte static void replaceAllCRWithLF(unsigned char* src, int len); // multibyte static void replaceAll00CR00LFWith000000LF(char16_t* src, int len); // UTF-16BE static void replaceAll00CRWith00LF(char16_t* src, int len); // UTF-16BE static void replaceAllCR00LF00With000000LF(char16_t* src, int len); // UTF-16LE static void replaceAllCR00With00LF(char16_t* src, int len); // UTF-16LE static void replaceAllLF00With00LF(char16_t* src, int len); // UTF-16LE static void replaceAll000000CR000000LFWith00000000000000LF(char32_t* src, int len); // UTF-32BE static void replaceAll000000CRWith000000LF(char32_t* src, int len); // UTF-32BE static void replaceAllCR000000LF000000With00000000000000LF(char32_t* src, int len); // UTF-32LE static void replaceAllCR000000With000000LF(char32_t* src, int len); // UTF-32LE static void replaceAllLF000000With000000LF(char32_t* src, int len); // UTF-32LE void detectCharset(); DWORD read(); template <class F> bool processData(F& f, bool flush); int findLF(const unsigned char* src, int len) const; const std::pair<int, int> project(int first, int last) const; template <class F> bool callback(int first, int last, F& f); private: static const int DATA_SIZE = 1024 * 1024; __declspec(align(32)) unsigned char data_[DATA_SIZE]; HANDLE file_; SyncReader reader_; ut::io::icu::CharsetDetector& detector_; std::string charsetName_; std::shared_ptr<UConverter> converter_; std::shared_ptr<UConverter> sjisConverter_; int availableDataLen_; int carryoveredDataLen_; long index_; ut::ipp::ipp_wstring line_; bool endsWithCR_; bool isUTF8_, isUTF16LE_, isUTF16BE_, isUTF32LE_, isUTF32BE_, isMultiByte_; }; namespace detail { template <class T, std::size_t N> struct is_same_byte { static const bool value = sizeof(T) == N; }; template <class T, class U> struct is_same_size { static const bool value = sizeof(T) == sizeof(U); }; template <class T> struct is_any_encoding_type { static const bool value = is_same_size<T, char>::value || is_same_size<T, wchar_t>::value || is_same_size<T, char16_t>::value || is_same_size<T, char32_t>::value; }; template <class T> struct is_wide_char_type { static const bool value = is_same_size<T, wchar_t>::value || is_same_size<T, char16_t>::value || is_same_size<T, char32_t>::value; }; } namespace ipp { template <class Elem> int find(const Elem* src, int len, Ipp8u val, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type* = nullptr); template <class Elem> int find(const Elem* src, int len, const Ipp8u* target, int lenTarget, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type* = nullptr); template <class Elem> int find(const Elem* src, int len, Ipp16u val, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type* = nullptr); template <class Elem> int find(const Elem* src, int len, const Ipp16u* target, int lenTarget, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type* = nullptr); template <class Elem> int find(const Elem* src, int len, Ipp32u val, typename std::enable_if< detail::is_same_size<Elem, Ipp32u>::value >::type* = nullptr); template <class Elem> int find(const Elem* src, int len, const Ipp32u* target, int lenTarget, typename std::enable_if< detail::is_same_size<Elem, Ipp32u>::value >::type* = nullptr); template <class Elem> int findRev(const Elem* src, int len, Ipp8u val, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type* = nullptr); template <class Elem> int findRev(const Elem* src, int len, const Ipp8u* target, int lenTarget, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type* = nullptr); template <class Elem> int findRev(const Elem* src, int len, Ipp16u val, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type* = nullptr); template <class Elem> int findRev(const Elem* src, int len, const Ipp16u* target, int lenTarget, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type* = nullptr); template <class Elem> int findRev(const Elem* src, int len, Ipp32u val, typename std::enable_if< detail::is_same_size<Elem, Ipp32u>::value >::type* = nullptr); template <class Elem> int findRev(const Elem* src, int len, const Ipp32u* target, int lenTarget, typename std::enable_if< detail::is_same_size<Elem, Ipp32u>::value >::type* = nullptr); template <class Elem> int compare(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type* = nullptr); template <class Elem> int compare(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type* = nullptr); template <class Container> int compare(const Container& src1, const Container& src2); template <class Elem> int icompare(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type* = nullptr); template <class Elem> int icompare(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type* = nullptr); template <class Container> int icompare(const Container& src1, const Container& src2); template <class Elem> bool equals(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type* = nullptr); template <class Elem> bool equals(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type* = nullptr); template <class Container> bool equals(const Container& src1, const Container& src2); template <class Elem> bool iequals(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type* = nullptr); template <class Elem> bool iequals(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type* = nullptr); template <class Container> bool iequals(const Container& src1, const Container& src2); void copy(const unsigned char* src, unsigned char* dst, int len); void move(const unsigned char* src, unsigned char* dst, int len); template <class Elem> void convertUTF16LEToUTF16BE(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, char16_t>::value >::type* = nullptr); template <class Container> void convertUTF16LEToUTF16BE(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char16_t>::value >::type* = nullptr); template <class Elem> void convertUTF16BEToUTF16LE(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, char16_t>::value >::type* = nullptr); template <class Container> void convertUTF16BEToUTF16LE(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char16_t>::value >::type* = nullptr); template <class Elem> void convertUTF32LEToUTF32BE(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, char32_t>::value >::type* = nullptr); template <class Container> void convertUTF32LEToUTF32BE(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char32_t>::value >::type* = nullptr); template <class Elem> void convertUTF32BEToUTF32LE(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, char32_t>::value >::type* = nullptr); template <class Container> void convertUTF32BEToUTF32LE(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char32_t>::value >::type* = nullptr); template <class Elem> void convertToUppercase(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, char>::value >::type* = nullptr); template <class Container> void convertToUppercase(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char>::value >::type* = nullptr); template <class Elem> void convertToUppercase(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, wchar_t>::value >::type* = nullptr); template <class Container> void convertToUppercase(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, wchar_t>::value >::type* = nullptr); template <class Elem> void convertToLowercase(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, char>::value >::type* = nullptr); template <class Container> void convertToLowercase(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char>::value >::type* = nullptr); template <class Elem> void convertToLowercase(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, wchar_t>::value >::type* = nullptr); template <class Container> void convertToLowercase(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, wchar_t>::value >::type* = nullptr); template <class Elem, class Container> long long convertUTF16ToUTF8(const Elem* src, int len, Container& dst, bool isBE, typename std::enable_if< detail::is_same_size<Elem, char16_t>::value && detail::is_same_size<typename Container::value_type, char>::value >::type* = nullptr); template <class Container1, class Container2> long long convertUTF16ToUTF8(const Container1& src, Container2& dst, bool isBE, typename std::enable_if< detail::is_same_size<typename Container1::value_type, char16_t>::value && detail::is_same_size<typename Container2::value_type, char>::value >::type* = nullptr); // VC2012, VC2013 でエラーになるため、宣言だけでなく定義も行う template <class Elem, class Container> long long convertUTF8ToUTF16(const Elem* src, int len, Container& dst, bool isBE, typename std::enable_if< detail::is_same_size<Elem, char>::value && detail::is_same_size<typename Container::value_type, char16_t>::value >::type* = nullptr) { if (len <= 0) { dst.clear(); return 0; } const bool hasBOM = len >= 3 && ut::io::hasBOM(reinterpret_cast<const char*>(src)); Ipp32u srcLen; Ipp32u dstLen = len + 1 + 16; // 最大 1 倍で済む。1 は BOM。16 は保険 IppStatus status; dst.resize(dstLen); if (hasBOM) { srcLen = len; status = ippsConvertUTF_8u16u(reinterpret_cast<const Ipp8u*>(src), &srcLen, const_cast<Ipp16u*>(reinterpret_cast<const Ipp16u*>(dst.data())), &dstLen, isBE); } else { boost::shared_array<Ipp8u> srcLocal(ippsMalloc_8u(len + 3), ut::SafeIppsFreer<Ipp8u>()); srcLocal[0] = 0xEF; srcLocal[1] = 0xBB; srcLocal[2] = 0xBF; ut::io::ipp::copy(reinterpret_cast<const unsigned char*>(src), &srcLocal[3], len); srcLen = len + 3; status = ippsConvertUTF_8u16u(srcLocal.get(), &srcLen, const_cast<Ipp16u*>(reinterpret_cast<const Ipp16u*>(dst.data())), &dstLen, isBE); } if (status != ippStsNoErr) { if (status == ippStsNullPtrErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ポインタが NULL です")); } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsConvertUTF_8u16u failed.")); } } dst.resize(dstLen); if (hasBOM) { return srcLen; } else { return srcLen - 3; } } template <class Container1, class Container2> long long convertUTF8ToUTF16(const Container1& src, Container2& dst, bool isBE, typename std::enable_if< detail::is_same_size<typename Container1::value_type, char>::value && detail::is_same_size<typename Container2::value_type, char16_t>::value >::type* = nullptr); } namespace icu { class CharsetDetector { public: virtual ~CharsetDetector() {} virtual std::shared_ptr<UConverter> open(const char* text, int len, std::string& charsetName, int* confidence = nullptr) = 0; }; class AutoCharsetDetector : public CharsetDetector { public: explicit AutoCharsetDetector(int limit); virtual ~AutoCharsetDetector() {} std::shared_ptr<UConverter> open(const char* text, int len, std::string& charsetName, int* confidence = nullptr) override; private: typedef std::shared_ptr<UCharsetDetector> SharedCharsetDetector; int limit_; tbb::enumerable_thread_specific<SharedCharsetDetector, tbb::cache_aligned_allocator<SharedCharsetDetector>, tbb::ets_key_per_instance> detector_; }; class FixedCharsetDetector : public CharsetDetector { public: explicit FixedCharsetDetector(const std::string& charsetName); virtual ~FixedCharsetDetector() {} std::shared_ptr<UConverter> open(const char* text, int len, std::string& charsetName, int* confidence = nullptr) override; private: std::string charsetName_; }; const std::wstring getErrorName(UErrorCode status); std::shared_ptr<UCharsetDetector> openCharsetDetector(); std::shared_ptr<UConverter> openConverter(const char* charsetName); template <class F> bool all(const char* src, int len, F&& pred); template <class F> bool all(const char16_t* src, int len, F&& pred); template <class Elem, class F> bool all(const Elem* src, int len, F&& pred, typename std::enable_if< detail::is_same_size<Elem, char>::value >::type* = nullptr); template <class Elem, class F> bool all(const Elem* src, int len, F&& pred, typename std::enable_if< detail::is_same_size<Elem, char16_t>::value >::type* = nullptr); template <class Container, class F> bool all(const Container& src, F&& pred); template <class Elem, class Container> long long convertFromUnicode(const Elem* src, int len, const char* charsetName, Container& dst, typename std::enable_if< detail::is_same_size<Elem, wchar_t>::value && detail::is_any_encoding_type<typename Container::value_type>::value >::type* = nullptr); template <class Elem, class Container> long long convertFromUnicode(const Elem* src, int len, UConverter* converter, bool flush, Container& dst, typename std::enable_if< detail::is_same_size<Elem, wchar_t>::value && detail::is_any_encoding_type<typename Container::value_type>::value >::type* = nullptr); template <class Elem> long long convertFromUnicode(const Elem* src, int len, UConverter* converter, bool flush, char*& target, const char* targetLimit, typename std::enable_if< detail::is_same_size<Elem, wchar_t>::value >::type* = nullptr); template <class Container1, class Container2> long long convertFromUnicode(const Container1& src, const char* charsetName, Container2& dst, typename std::enable_if< detail::is_same_size<typename Container1::value_type, wchar_t>::value && detail::is_any_encoding_type<typename Container2::value_type>::value >::type* = nullptr); template <class Container1, class Container2> long long convertFromUnicode(const Container1& src, UConverter* converter, bool flush, Container2& dst, typename std::enable_if< detail::is_same_size<typename Container1::value_type, wchar_t>::value && detail::is_any_encoding_type<typename Container2::value_type>::value >::type* = nullptr); template <class Elem, class Container> long long convertToUnicode(const Elem* src, int len, const char* charsetName, Container& dst, typename std::enable_if< detail::is_any_encoding_type<Elem>::value && detail::is_same_size<typename Container::value_type, wchar_t>::value >::type* = nullptr); template <class Elem, class Container> long long convertToUnicode(const Elem* src, int len, UConverter* converter, bool flush, Container& dst, typename std::enable_if< detail::is_wide_char_type<Elem>::value && detail::is_same_size<typename Container::value_type, wchar_t>::value >::type* = nullptr); template <class Elem, class Container> long long convertToUnicode(const Elem* src, int len, UConverter* converter, bool flush, Container& dst, typename std::enable_if< detail::is_same_size<Elem, char>::value && detail::is_same_size<typename Container::value_type, wchar_t>::value >::type* = nullptr); template <class Container1, class Container2> long long convertToUnicode(const Container1& src, const char* charsetName, Container2& dst, typename std::enable_if< detail::is_any_encoding_type<typename Container1::value_type>::value && detail::is_same_size<typename Container2::value_type, wchar_t>::value >::type* = nullptr); template <class Container1, class Container2> long long convertToUnicode(const Container1& src, UConverter* converter, bool flush, Container2& dst, typename std::enable_if< detail::is_any_encoding_type<typename Container1::value_type>::value && detail::is_same_size<typename Container2::value_type, wchar_t>::value >::type* = nullptr); } class IOException : public boost::exception, public std::runtime_error { public: explicit IOException(const std::wstring& what = L""); virtual ~IOException() throw(); }; class Filter { public: explicit Filter(std::shared_ptr<Filter> parentFilter = nullptr) : parentFilter_(parentFilter) {} virtual ~Filter() {} void setParentFilter(std::shared_ptr<Filter> parentFilter) { parentFilter_ = parentFilter; } const Filter* getParentFilter() const { return parentFilter_.get(); } bool processParentFilter(bool isDirectory, const ut::ipp::ipp_wstring& path) const { return !parentFilter_ || (*parentFilter_)(isDirectory, path); } bool processParentFilter(HANDLE file) const { return !parentFilter_ || (*parentFilter_)(file); } bool operator()(bool isDirectory, const std::wstring& path) const { return (*this)(isDirectory, ut::ipp::ipp_wstring(path.cbegin(), path.cend())); } virtual bool operator()(bool isDirectory, const ut::ipp::ipp_wstring& path) const { return processParentFilter(isDirectory, path); } virtual bool operator()(HANDLE file) const { return processParentFilter(file); } private: std::shared_ptr<Filter> parentFilter_; }; class FileSizeFilter : public Filter { public: FileSizeFilter(long long threshold, bool less, std::shared_ptr<Filter> parentFilter = nullptr) : threshold_(threshold), less_(less), Filter(parentFilter) {} virtual ~FileSizeFilter() {} bool operator()(HANDLE file) const override; private: long long threshold_; bool less_; }; bool hasBOM(const char* src); bool hasBOM(const char16_t* src); bool hasBOM(const char32_t* src); template <class Elem> bool hasBOM(const Elem* src, typename std::enable_if< detail::is_same_size<Elem, char>::value >::type* = nullptr); template <class Elem> bool hasBOM(const Elem* src, typename std::enable_if< detail::is_same_size<Elem, char16_t>::value >::type* = nullptr); template <class Elem> bool hasBOM(const Elem* src, typename std::enable_if< detail::is_same_size<Elem, char32_t>::value >::type* = nullptr); template <class Container> bool hasBOM(const Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char>::value >::type* = nullptr); template <class Container> bool hasBOM(const Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char16_t>::value >::type* = nullptr); template <class Container> bool hasBOM(const Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char32_t>::value >::type* = nullptr); template <class F> void forEachLine(HANDLE file, icu::CharsetDetector& detector, F& callback); template <class F> void forEachLine(ForEachLine& iterator, F& callback); } } template <class T> HANDLE ut::io::openFile(const T& path) { return CreateFile(ut::addExtendedLengthPathPrefix(path).c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, nullptr); } // ForEachLine template <class F> bool ut::io::ForEachLine::operator()(F& callback) { while (read() > 0) { if (!processData(callback, false)) { return false; } } return processData(callback, true); } template <class F> bool ut::io::ForEachLine::processData(F& f, bool flush) { if (flush) { if (!callback(0, availableDataLen_, f)) { carryoveredDataLen_ = 0; return false; } carryoveredDataLen_ = 0; return true; } int first = 0; { // 改行コードを LF に統一 if ((isUTF32BE_ || isUTF32LE_) && availableDataLen_ < sizeof(char32_t)) { carryoveredDataLen_ = availableDataLen_; return true; } else if ((isUTF16BE_ || isUTF16LE_) && availableDataLen_ < sizeof(char16_t)) { carryoveredDataLen_ = availableDataLen_; return true; } else if ((isUTF8_ || isMultiByte_) && availableDataLen_ < sizeof(char)) { carryoveredDataLen_ = availableDataLen_; return true; } if (isUTF32BE_) { if (endsWithCR_ && reinterpret_cast<char32_t*>(data_)[0] == 0x0A000000) { reinterpret_cast<char32_t*>(data_)[0] = 0; first = 4; } endsWithCR_ = reinterpret_cast<char32_t*>(data_)[availableDataLen_ / sizeof(char32_t) - 1] == 0x0D000000; replaceAll000000CR000000LFWith00000000000000LF(reinterpret_cast<char32_t*>(data_), availableDataLen_ / sizeof(char32_t)); replaceAll000000CRWith000000LF(reinterpret_cast<char32_t*>(data_), availableDataLen_ / sizeof(char32_t)); } else if (isUTF32LE_) { if (endsWithCR_ && reinterpret_cast<char32_t*>(data_)[0] == 0x0000000A) { reinterpret_cast<char32_t*>(data_)[0] = 0; first = 4; } endsWithCR_ = reinterpret_cast<char32_t*>(data_)[availableDataLen_ / sizeof(char32_t) - 1] == 0x0000000D; replaceAllCR000000LF000000With00000000000000LF(reinterpret_cast<char32_t*>(data_), availableDataLen_ / sizeof(char32_t)); replaceAllCR000000With000000LF(reinterpret_cast<char32_t*>(data_), availableDataLen_ / sizeof(char32_t)); replaceAllLF000000With000000LF(reinterpret_cast<char32_t*>(data_), availableDataLen_ / sizeof(char32_t)); } else if (isUTF16BE_) { if (endsWithCR_ && reinterpret_cast<char16_t*>(data_)[0] == 0x0A00) { reinterpret_cast<char16_t*>(data_)[0] = 0; first = 2; } endsWithCR_ = reinterpret_cast<char16_t*>(data_)[availableDataLen_ / sizeof(char16_t) - 1] == 0x0D00; replaceAll00CR00LFWith000000LF(reinterpret_cast<char16_t*>(data_), availableDataLen_ / sizeof(char16_t)); replaceAll00CRWith00LF(reinterpret_cast<char16_t*>(data_), availableDataLen_ / sizeof(char16_t)); } else if (isUTF16LE_) { if (endsWithCR_ && reinterpret_cast<char16_t*>(data_)[0] == 0x000A) { reinterpret_cast<char16_t*>(data_)[0] = 0; first = 2; } endsWithCR_ = reinterpret_cast<char16_t*>(data_)[availableDataLen_ / sizeof(char16_t) - 1] == 0x000D; replaceAllCR00LF00With000000LF(reinterpret_cast<char16_t*>(data_), availableDataLen_ / sizeof(char16_t)); replaceAllCR00With00LF(reinterpret_cast<char16_t*>(data_), availableDataLen_ / sizeof(char16_t)); replaceAllLF00With00LF(reinterpret_cast<char16_t*>(data_), availableDataLen_ / sizeof(char16_t)); } else { if (endsWithCR_ && data_[0] == '\n') { data_[0] = 0; first = 1; } endsWithCR_ = data_[availableDataLen_ - 1] == '\r'; replaceAllCRLFWith00LF(data_, availableDataLen_); replaceAllCRWithLF(data_, availableDataLen_); } } int last = 0; int alignedLast = 0; while ((last = findLF(data_ + alignedLast, availableDataLen_ - alignedLast)) != -1) { data_[last += alignedLast] = 0; if (!callback(first, last, f)) { return false; } first = last + 1; last = first; alignedLast = last - last % 32; if (first >= availableDataLen_) { break; } } if (first < availableDataLen_) { carryoveredDataLen_ = availableDataLen_ - first; ut::io::ipp::move(data_ + first, data_, carryoveredDataLen_); } else { carryoveredDataLen_ = 0; } return true; } template <class F> bool ut::io::ForEachLine::callback(int first, int last, F& f) { const std::pair<int, int> range(project(first, last)); if (isUTF8_) { if (index_ <= 1 && (range.second - range.first) >= 3 && ut::io::hasBOM(&data_[range.first])) { // UTF-8 BOM を除去 return f(reinterpret_cast<const char*>(&data_[range.first + 3]), reinterpret_cast<const char*>(&data_[range.second]), index_++); } else { return f(reinterpret_cast<const char*>(&data_[range.first]), reinterpret_cast<const char*>(&data_[range.second]), index_++); } } else if (isUTF16LE_) { static_assert(sizeof(wchar_t) == sizeof(char16_t), "wchar_t must be 2 bytes."); const int bytes = range.second - range.first; const wchar_t* first = reinterpret_cast<const wchar_t*>(&data_[range.first]); const wchar_t* last = first + bytes / sizeof(char16_t); if (bytes % sizeof(char16_t) != 0) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"bytes must be multiple of 2.")); } if (index_ <= 1 && last - first >= 1 && ut::io::hasBOM(first)) { // UTF-16LE BOM を除去 return f(first + 1, last, index_++); } else { return f(first, last, index_++); } } else if (isUTF16BE_) { static_assert(sizeof(wchar_t) == sizeof(char16_t), "wchar_t must be 2 bytes."); const int bytes = range.second - range.first; wchar_t* first = reinterpret_cast<wchar_t*>(&data_[range.first]); wchar_t* last = first + bytes / sizeof(char16_t); if (bytes % sizeof(char16_t) != 0) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"bytes must be multiple of 2.")); } if (index_ <= 1 && last - first >= 1 && ut::io::hasBOM(first)) { // UTF-16BE BOM を除去 const int len = last - first - 1; if (len > 0) { ut::io::ipp::convertUTF16BEToUTF16LE(first + 1, len); } return f(first + 1, last, index_++); } else { const int len = last - first; if (len > 0) { ut::io::ipp::convertUTF16BEToUTF16LE(first, len); } return f(first, last, index_++); } } else if (isUTF32LE_ || isUTF32BE_) { const int bytes = range.second - range.first; char32_t* first = reinterpret_cast<char32_t*>(&data_[range.first]); char32_t* last = first + bytes / sizeof(char32_t); if (bytes % sizeof(char32_t) != 0) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"bytes must be multiple of 4.")); } if (index_ <= 1 && last - first >= 1 && ut::io::hasBOM(first)) { // UTF-32LE BOM を除去 const int len = last - first - 1; if (len > 0) { ut::io::icu::convertToUnicode(first + 1, len, converter_.get(), true, line_); } else { line_.clear(); } } else { const int len = last - first; if (len > 0) { ut::io::icu::convertToUnicode(first, len, converter_.get(), true, line_); } else { line_.clear(); } } return f(line_.data(), line_.data() + line_.size(), index_++); } else if (isMultiByte_) { try { ut::io::icu::convertToUnicode(&data_[range.first], range.second - range.first, converter_.get(), true, line_); if (!ut::io::icu::all(line_, [](int32_t c) -> bool { return c != 0 && (u_isprint(c) || c == '\t'); })) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"不正なフォーマットです。")); } } catch (...) { if (sjisConverter_) { ut::io::icu::convertToUnicode(&data_[range.first], range.second - range.first, sjisConverter_.get(), true, line_); converter_ = sjisConverter_; sjisConverter_.reset(); } else { throw; } } return f(line_.data(), line_.data() + line_.size(), index_++); } else { return f(reinterpret_cast<const char*>(&data_[range.first]), reinterpret_cast<const char*>(&data_[range.second]), index_++); } } // IPP template <class Elem> int ut::io::ipp::find(const Elem* src, int len, Ipp8u val, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type*) { int pos; if (ippsFindC_8u(reinterpret_cast<const Ipp8u*>(src), len, val, &pos) == ippStsNoErr) { return pos; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsFindC_8u failed.")); } } template <class Elem> int ut::io::ipp::find(const Elem* src, int len, const Ipp8u* target, int lenTarget, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type*) { int pos; if (ippsFind_8u(reinterpret_cast<const Ipp8u*>(src), len, target, lenTarget, &pos) == ippStsNoErr) { return pos; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsFind_8u failed.")); } } template <class Elem> int ut::io::ipp::find(const Elem* src, int len, Ipp16u val, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type*) { int pos; if (ippsFindC_16u(reinterpret_cast<const Ipp16u*>(src), len, val, &pos) == ippStsNoErr) { return pos; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsFindC_16u failed.")); } } template <class Elem> int ut::io::ipp::find(const Elem* src, int len, const Ipp16u* target, int lenTarget, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type*) { int pos; if (ippsFind_16u(reinterpret_cast<const Ipp16u*>(src), len, target, lenTarget, &pos) == ippStsNoErr) { return pos; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsFind_16u failed.")); } } template <class Elem> int ut::io::ipp::find(const Elem* src, int len, Ipp32u val, typename std::enable_if< detail::is_same_size<Elem, Ipp32u>::value >::type*) { const Elem* pos = reinterpret_cast<const Elem*>(std::find(reinterpret_cast<const Ipp32u*>(src), reinterpret_cast<const Ipp32u*>(src) + len, val)); if (pos == src + len) { return -1; } return pos - src; } template <class Elem> int ut::io::ipp::find(const Elem* src, int len, const Ipp32u* target, int lenTarget, typename std::enable_if< detail::is_same_size<Elem, Ipp32u>::value >::type*) { const Elem* pos = reinterpret_cast<const Elem*>(std::search(reinterpret_cast<const Ipp32u*>(src), reinterpret_cast<const Ipp32u*>(src) + len, target, target + lenTarget)); if (pos == src + len) { return -1; } return pos - src; } template <class Elem> int ut::io::ipp::findRev(const Elem* src, int len, Ipp8u val, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type*) { int pos; if (ippsFindRevC_8u(reinterpret_cast<const Ipp8u*>(src), len, val, &pos) == ippStsNoErr) { return pos; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsFindRevC_8u failed.")); } } template <class Elem> int ut::io::ipp::findRev(const Elem* src, int len, const Ipp8u* target, int lenTarget, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type*) { int pos; if (ippsFindRev_8u(reinterpret_cast<const Ipp8u*>(src), len, target, lenTarget, &pos) == ippStsNoErr) { return pos; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsFindRev_8u failed.")); } } template <class Elem> int ut::io::ipp::findRev(const Elem* src, int len, Ipp16u val, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type*) { int pos; if (ippsFindRevC_16u(reinterpret_cast<const Ipp16u*>(src), len, val, &pos) == ippStsNoErr) { return pos; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsFindRevC_16u failed.")); } } template <class Elem> int ut::io::ipp::findRev(const Elem* src, int len, const Ipp16u* target, int lenTarget, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type*) { int pos; if (ippsFindRev_16u(reinterpret_cast<const Ipp16u*>(src), len, target, lenTarget, &pos) == ippStsNoErr) { return pos; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsFindRev_16u failed.")); } } template <class Elem> int ut::io::ipp::findRev(const Elem* src, int len, Ipp32u val, typename std::enable_if< detail::is_same_size<Elem, Ipp32u>::value >::type*) { const boost::reverse_iterator<const Ipp32u*> reverse_it_first(reinterpret_cast<const Ipp32u*>(src) + len); const boost::reverse_iterator<const Ipp32u*> reverse_it_last(reinterpret_cast<const Ipp32u*>(src)); const boost::reverse_iterator<const Ipp32u*> pos(std::find(reverse_it_first, reverse_it_last, val)); if (pos == reverse_it_last) { return -1; } return pos.base() - reinterpret_cast<const Ipp32u*>(src); } template <class Elem> int ut::io::ipp::findRev(const Elem* src, int len, const Ipp32u* target, int lenTarget, typename std::enable_if< detail::is_same_size<Elem, Ipp32u>::value >::type*) { const boost::reverse_iterator<const Ipp32u*> reverse_it_first(reinterpret_cast<const Ipp32u*>(src) + len); const boost::reverse_iterator<const Ipp32u*> reverse_it_last(reinterpret_cast<const Ipp32u*>(src)); const boost::reverse_iterator<const Ipp32u*> pos(std::search(reverse_it_first, reverse_it_last, target, target + lenTarget)); if (pos == reverse_it_last) { return -1; } return pos.base() - reinterpret_cast<const Ipp32u*>(src); } template <class Elem> int ut::io::ipp::compare(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type*) { const int len = (std::min)(len1, len2); int result; if (ippsCompare_8u(reinterpret_cast<const Ipp8u*>(src1), reinterpret_cast<const Ipp8u*>(src2), len, &result) == ippStsNoErr) { if (result == 0) { if (len1 < len2) { return -1; } else if (len1 > len2) { return +1; } return 0; } return result; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsCompare_8u failed.")); } } template <class Elem> int ut::io::ipp::compare(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type*) { const int len = (std::min)(len1, len2); int result; if (ippsCompare_16u(reinterpret_cast<const Ipp16u*>(src1), reinterpret_cast<const Ipp16u*>(src2), len, &result) == ippStsNoErr) { if (result == 0) { if (len1 < len2) { return -1; } else if (len1 > len2) { return +1; } return 0; } return result; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsCompare_16u failed.")); } } template <class Container> int ut::io::ipp::compare(const Container& src1, const Container& src2) { return compare(src1.data(), src1.size(), src2.data(), src2.size()); } template <class Elem> int ut::io::ipp::icompare(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type*) { const int len = (std::min)(len1, len2); int result; if (ippsCompareIgnoreCaseLatin_8u(reinterpret_cast<const Ipp8u*>(src1), reinterpret_cast<const Ipp8u*>(src2), len, &result) == ippStsNoErr) { if (result == 0) { if (len1 < len2) { return -1; } else if (len1 > len2) { return +1; } return 0; } return result; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsCompareIgnoreCaseLatin_8u failed.")); } } template <class Elem> int ut::io::ipp::icompare(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type*) { const int len = (std::min)(len1, len2); int result; if (ippsCompareIgnoreCase_16u(reinterpret_cast<const Ipp16u*>(src1), reinterpret_cast<const Ipp16u*>(src2), len, &result) == ippStsNoErr) { if (result == 0) { if (len1 < len2) { return -1; } else if (len1 > len2) { return +1; } return 0; } return result; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsCompareIgnoreCase_16u failed.")); } } template <class Container> int ut::io::ipp::icompare(const Container& src1, const Container& src2) { return icompare(src1.data(), src1.size(), src2.data(), src2.size()); } template <class Elem> bool ut::io::ipp::equals(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type*) { if (len1 == len2) { int result; if (ippsEqual_8u(reinterpret_cast<const Ipp8u*>(src1), reinterpret_cast<const Ipp8u*>(src2), len1, &result) == ippStsNoErr) { return result != 0; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsEqual_8u failed.")); } } return false; } template <class Elem> bool ut::io::ipp::equals(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type*) { if (len1 == len2) { int result; if (ippsEqual_16u(reinterpret_cast<const Ipp16u*>(src1), reinterpret_cast<const Ipp16u*>(src2), len1, &result) == ippStsNoErr) { return result != 0; } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsEqual_16u failed.")); } } return false; } template <class Container> bool ut::io::ipp::equals(const Container& src1, const Container& src2) { return equals(src1.data(), src1.size(), src2.data(), src2.size()); } template <class Elem> bool ut::io::ipp::iequals(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp8u>::value >::type*) { return icompare(src1, len1, src2, len2) == 0; } template <class Elem> bool ut::io::ipp::iequals(const Elem* src1, int len1, const Elem* src2, int len2, typename std::enable_if< detail::is_same_size<Elem, Ipp16u>::value >::type*) { return icompare(src1, len1, src2, len2) == 0; } template <class Container> bool ut::io::ipp::iequals(const Container& src1, const Container& src2) { return iequals(src1.data(), src1.size(), src2.data(), src2.size()); } template <class Elem> void ut::io::ipp::convertUTF16LEToUTF16BE(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, char16_t>::value >::type*) { const IppStatus status = ippsSwapBytes_16u_I(reinterpret_cast<Ipp16u*>(src), len); if (status != ippStsNoErr) { if (status == ippStsNullPtrErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ポインタが NULL です")); } else if (status == ippStsSizeErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"Len が 0 以下です")); } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsSwapBytes_16u_I failed.")); } } } template <class Container> void ut::io::ipp::convertUTF16LEToUTF16BE(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char16_t>::value >::type*) { return convertUTF16LEToUTF16BE(const_cast<char16_t*>(reinterpret_cast<const char16_t*>(src.data())), src.size()); } template <class Elem> void ut::io::ipp::convertUTF16BEToUTF16LE(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, char16_t>::value >::type*) { convertUTF16LEToUTF16BE(src, len); } template <class Container> void ut::io::ipp::convertUTF16BEToUTF16LE(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char16_t>::value >::type*) { return convertUTF16BEToUTF16LE(const_cast<char16_t*>(reinterpret_cast<const char16_t*>(src.data())), src.size()); } template <class Elem> void ut::io::ipp::convertUTF32LEToUTF32BE(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, char32_t>::value >::type*) { const IppStatus status = ippsSwapBytes_32u_I(reinterpret_cast<Ipp32u*>(src), len); if (status != ippStsNoErr) { if (status == ippStsNullPtrErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ポインタが NULL です")); } else if (status == ippStsSizeErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"Len が 0 以下です")); } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsSwapBytes_32u_I failed.")); } } } template <class Container> void ut::io::ipp::convertUTF32LEToUTF32BE(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char32_t>::value >::type*) { return convertUTF32LEToUTF32BE(const_cast<char32_t*>(reinterpret_cast<const char32_t*>(src.data())), src.size()); } template <class Elem> void ut::io::ipp::convertUTF32BEToUTF32LE(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, char32_t>::value >::type*) { return convertUTF32LEToUTF32BE(src, len); } template <class Container> void ut::io::ipp::convertUTF32BEToUTF32LE(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char32_t>::value >::type*) { return convertUTF32BEToUTF32LE(const_cast<char32_t*>(reinterpret_cast<const char32_t*>(src.data())), src.size()); } template <class Elem> void ut::io::ipp::convertToUppercase(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, char>::value >::type*) { if (len <= 0) { return; } IppStatus status = ippsUppercaseLatin_8u_I(reinterpret_cast<Ipp8u*>(src), len); if (status != ippStsNoErr) { if (status == ippStsNullPtrErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ポインタが NULL です")); } else if (status == ippStsLengthErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"Len が負です")); } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsUppercaseLatin_8u_I failed.")); } } } template <class Container> inline void ut::io::ipp::convertToUppercase(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char>::value >::type*) { return convertToUppercase(const_cast<char*>(reinterpret_cast<const char*>(src.data())), src.size()); } template <class Elem> void ut::io::ipp::convertToUppercase(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, wchar_t>::value >::type*) { if (len <= 0) { return; } IppStatus status = ippsUppercase_16u_I(reinterpret_cast<Ipp16u*>(src), len); if (status != ippStsNoErr) { if (status == ippStsNullPtrErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ポインタが NULL です")); } else if (status == ippStsLengthErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"Len が負です")); } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsUppercase_16u_I failed.")); } } } template <class Container> inline void ut::io::ipp::convertToUppercase(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, wchar_t>::value >::type*) { return convertToUppercase(const_cast<wchar_t*>(reinterpret_cast<const wchar_t*>(src.data())), src.size()); } template <class Elem> void ut::io::ipp::convertToLowercase(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, char>::value >::type*) { if (len <= 0) { return; } IppStatus status = ippsLowercaseLatin_8u_I(reinterpret_cast<Ipp8u*>(src), len); if (status != ippStsNoErr) { if (status == ippStsNullPtrErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ポインタが NULL です")); } else if (status == ippStsLengthErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"Len が負です")); } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsLowercaseLatin_8u_I failed.")); } } } template <class Container> inline void ut::io::ipp::convertToLowercase(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char>::value >::type*) { return convertToLowercase(const_cast<char*>(reinterpret_cast<const char*>(src.data())), src.size()); } template <class Elem> void ut::io::ipp::convertToLowercase(Elem* src, int len, typename std::enable_if< detail::is_same_size<Elem, wchar_t>::value >::type*) { if (len <= 0) { return; } IppStatus status = ippsLowercase_16u_I(reinterpret_cast<Ipp16u*>(src), len); if (status != ippStsNoErr) { if (status == ippStsNullPtrErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ポインタが NULL です")); } else if (status == ippStsLengthErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"Len が負です")); } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsLowercase_16u_I failed.")); } } } template <class Container> inline void ut::io::ipp::convertToLowercase(Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, wchar_t>::value >::type*) { return convertToLowercase(const_cast<wchar_t*>(reinterpret_cast<const wchar_t*>(src.data())), src.size()); } template <class Elem, class Container> long long ut::io::ipp::convertUTF16ToUTF8(const Elem* src, int len, Container& dst, bool isBE, typename std::enable_if< detail::is_same_size<Elem, char16_t>::value && detail::is_same_size<typename Container::value_type, char>::value >::type*) { if (len <= 0) { dst.clear(); return 0; } Ipp32u srcLen = len; Ipp32u dstLen = len * 3 + 3 + 16; // 最大 3 倍で済む。3 は BOM。16 は保険 dst.resize(dstLen); IppStatus status = ippsConvertUTF_16u8u(reinterpret_cast<const Ipp16u*>(src), &srcLen, const_cast<Ipp8u*>(reinterpret_cast<const Ipp8u*>(dst.data())), &dstLen, isBE); if (status != ippStsNoErr) { if (status == ippStsNullPtrErr) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ポインタが NULL です")); } else { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ippsConvertUTF_16u8u failed.")); } } dst.resize(dstLen); return srcLen; } template <class Container1, class Container2> inline long long ut::io::ipp::convertUTF16ToUTF8(const Container1& src, Container2& dst, bool isBE, typename std::enable_if< detail::is_same_size<typename Container1::value_type, char16_t>::value && detail::is_same_size<typename Container2::value_type, char>::value >::type*) { return convertUTF16ToUTF8(src.data(), src.size(), dst, isBE); } template <class Container1, class Container2> inline long long ut::io::ipp::convertUTF8ToUTF16(const Container1& src, Container2& dst, bool isBE, typename std::enable_if< detail::is_same_size<typename Container1::value_type, char>::value && detail::is_same_size<typename Container2::value_type, char16_t>::value >::type*) { return convertUTF8ToUTF16(src.data(), src.size(), dst, isBE); } // ICU template <class F> bool ut::io::icu::all(const char* src, int len, F&& pred) { int i = 0; while (i < len) { int32_t c = 0; U8_NEXT(src, i, len, c); if (c == U_SENTINEL) { return false; } if (!pred(c)) { return false; } } return true; } template <class F> bool ut::io::icu::all(const char16_t* src, int len, F&& pred) { int i = 0; while (i < len) { int32_t c = 0; U16_NEXT(src, i, len, c); if (!pred(c)) { return false; } } return true; } template <class Elem, class F> bool ut::io::icu::all(const Elem* src, int len, F&& pred, typename std::enable_if< detail::is_same_size<Elem, char>::value >::type*) { return all(reinterpret_cast<const char*>(src), len, std::forward<F>(pred)); } template <class Elem, class F> bool ut::io::icu::all(const Elem* src, int len, F&& pred, typename std::enable_if< detail::is_same_size<Elem, char16_t>::value >::type*) { return all(reinterpret_cast<const char16_t*>(src), len, std::forward<F>(pred)); } template <class Container, class F> bool ut::io::icu::all(const Container& src, F&& pred) { return all(src.data(), src.size(), std::forward<F>(pred)); } template <class Elem, class Container> long long ut::io::icu::convertFromUnicode(const Elem* src, int len, const char* charsetName, Container& dst, typename std::enable_if< detail::is_same_size<Elem, wchar_t>::value && detail::is_any_encoding_type<typename Container::value_type>::value >::type*) { return convertFromUnicode(src, len, openConverter(charsetName).get(), true, dst); } template <class Elem, class Container> long long ut::io::icu::convertFromUnicode(const Elem* src, int len, UConverter* converter, bool flush, Container& dst, typename std::enable_if< detail::is_same_size<Elem, wchar_t>::value && detail::is_any_encoding_type<typename Container::value_type>::value >::type*) { if (len <= 0) { dst.clear(); return 0; } typedef typename Container::value_type value_type; const int maxSize = ucnv_getMaxCharSize(converter); const std::size_t size = (std::max)(len, len * maxSize + 16); // 16 は保険 dst.resize((size + sizeof(value_type) - 1) / sizeof(value_type)); char* target = const_cast<char*>(reinterpret_cast<const char*>(dst.data())); const char* targetLimit = target + size; const long long result = convertFromUnicode(src, len, converter, flush, target, targetLimit); dst.resize((target - targetLimit + size + sizeof(value_type) - 1) / sizeof(value_type)); return result; } template <class Elem> long long ut::io::icu::convertFromUnicode(const Elem* src, int len, UConverter* converter, bool flush, char*& target, const char* targetLimit, typename std::enable_if< detail::is_same_size<Elem, wchar_t>::value >::type*) { if (len <= 0) { return 0; } const UChar* source = reinterpret_cast<const UChar*>(src); const UChar* sourceLimit = source + len; UErrorCode status = U_ZERO_ERROR; ucnv_fromUnicode(converter, &target, targetLimit, &source, sourceLimit, nullptr, flush, &status); if (U_FAILURE(status)) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ucnv_fromUnicode failed: " + getErrorName(status))); } return source - reinterpret_cast<const UChar*>(src); } template <class Container1, class Container2> long long ut::io::icu::convertFromUnicode(const Container1& src, const char* charsetName, Container2& dst, typename std::enable_if< detail::is_same_size<typename Container1::value_type, wchar_t>::value && detail::is_any_encoding_type<typename Container2::value_type>::value >::type*) { return convertFromUnicode(src.data(), src.size(), charsetName, dst); } template <class Container1, class Container2> long long ut::io::icu::convertFromUnicode(const Container1& src, UConverter* converter, bool flush, Container2& dst, typename std::enable_if< detail::is_same_size<typename Container1::value_type, wchar_t>::value && detail::is_any_encoding_type<typename Container2::value_type>::value >::type*) { return convertFromUnicode(src.data(), src.size(), converter, flush, dst); } template <class Elem, class Container> long long ut::io::icu::convertToUnicode(const Elem* src, int len, const char* charsetName, Container& dst, typename std::enable_if< detail::is_any_encoding_type<Elem>::value && detail::is_same_size<typename Container::value_type, wchar_t>::value >::type*) { return convertToUnicode(src, len, openConverter(charsetName).get(), true, dst); } template <class Elem, class Container> long long ut::io::icu::convertToUnicode(const Elem* src, int len, UConverter* converter, bool flush, Container& dst, typename std::enable_if< detail::is_wide_char_type<Elem>::value && detail::is_same_size<typename Container::value_type, wchar_t>::value >::type*) { return convertToUnicode(reinterpret_cast<const char*>(src), len * sizeof(Elem), converter, flush, dst) / sizeof(Elem); } template <class Elem, class Container> long long ut::io::icu::convertToUnicode(const Elem* src, int len, UConverter* converter, bool flush, Container& dst, typename std::enable_if< detail::is_same_size<Elem, char>::value && detail::is_same_size<typename Container::value_type, wchar_t>::value >::type*) { if (len <= 0) { dst.clear(); return 0; } const int minSize = ucnv_getMinCharSize(converter); const std::size_t size = (std::max)(len, 2 * len / minSize + 16); // 2 倍するのはサロゲートペアのため。16 は保険 dst.resize(size); const char* source = reinterpret_cast<const char*>(src); const char* sourceLimit = source + len; UChar* target = const_cast<UChar*>(reinterpret_cast<const UChar*>(dst.data())); const UChar* targetLimit = target + size; UErrorCode status = U_ZERO_ERROR; ucnv_toUnicode(converter, &target, targetLimit, &source, sourceLimit, nullptr, flush, &status); if (U_FAILURE(status)) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"ucnv_toUnicode failed: " + getErrorName(status))); } dst.resize(target - targetLimit + size); return source - reinterpret_cast<const char*>(src); } template <class Container1, class Container2> long long ut::io::icu::convertToUnicode(const Container1& src, const char* charsetName, Container2& dst, typename std::enable_if< detail::is_any_encoding_type<typename Container1::value_type>::value && detail::is_same_size<typename Container2::value_type, wchar_t>::value >::type*) { return convertToUnicode(src.data(), src.size(), charsetName, dst); } template <class Container1, class Container2> long long ut::io::icu::convertToUnicode(const Container1& src, UConverter* converter, bool flush, Container2& dst, typename std::enable_if< detail::is_any_encoding_type<typename Container1::value_type>::value && detail::is_same_size<typename Container2::value_type, wchar_t>::value >::type*) { return convertToUnicode(src.data(), src.size(), converter, flush, dst); } // IO template <class Elem> bool ut::io::hasBOM(const Elem* src, typename std::enable_if< detail::is_same_size<Elem, char>::value >::type*) { return hasBOM(reinterpret_cast<const char*>(src)); } template <class Elem> bool ut::io::hasBOM(const Elem* src, typename std::enable_if< detail::is_same_size<Elem, char16_t>::value >::type*) { return hasBOM(reinterpret_cast<const char16_t*>(src)); } template <class Elem> bool ut::io::hasBOM(const Elem* src, typename std::enable_if< detail::is_same_size<Elem, char32_t>::value >::type*) { return hasBOM(reinterpret_cast<const char32_t*>(src)); } template <class Container> bool ut::io::hasBOM(const Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char>::value >::type*) { if (src.size() < 3) { return false; } return hasBOM(src.data()); } template <class Container> bool ut::io::hasBOM(const Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char16_t>::value >::type*) { if (src.empty()) { return false; } return hasBOM(src.data()); } template <class Container> bool ut::io::hasBOM(const Container& src, typename std::enable_if< detail::is_same_size<typename Container::value_type, char32_t>::value >::type*) { if (src.empty()) { return false; } return hasBOM(src.data()); } template <class F> void ut::io::forEachLine(HANDLE file, icu::CharsetDetector& detector, F& callback) { if (file != nullptr && file != INVALID_HANDLE_VALUE) { ForEachLine iterator(file, detector); return forEachLine(iterator, callback); } else { BOOST_THROW_EXCEPTION(IOException(ut::getLastErrorMessage())); } } template <class F> void ut::io::forEachLine(ForEachLine& iterator, F& callback) { iterator(callback); } #endif
grep.hpp

#ifndef UT_GREP_HPP_20131227 #define UT_GREP_HPP_20131227 #if defined(_MSC_VER) && (_MSC_VER >= 1020) # pragma once #endif #include <memory> #include <stdexcept> #include <string> #include <boost/exception/all.hpp> #include <boost/algorithm/string/trim.hpp> #include <boost/algorithm/string/classification.hpp> #include <unicode/uchar.h> #include <tbb/atomic.h> #include <tbb/concurrent_vector.h> #include <tbb/enumerable_thread_specific.h> #include <tbb/mutex.h> #include <tbb/parallel_for_each.h> #include <tbb/task.h> #include <ippch.h> #include "encode.hpp" #include "win.hpp" #include "ipp_allocator.hpp" #include "ipp_string.hpp" #include "io.hpp" namespace ut { typedef tbb::mutex RegexStateMutex; class RegexException : public boost::exception, public std::runtime_error { public: explicit RegexException(const std::wstring& what = L""); virtual ~RegexException() throw(); }; class Matcher { public: virtual ~Matcher() {} virtual bool operator()(const char* line, int len) = 0; virtual bool operator()(const wchar_t* line, int len) = 0; template <class T> bool operator()(const T& line); }; class PlainMatcher : public Matcher { public: PlainMatcher(const ut::ipp::ipp_wstring& target, bool complete, bool caseInsensitive) : u16Target_(target), complete_(complete), caseInsensitive_(caseInsensitive) { if (!complete_ && caseInsensitive) { // ifind がないため、この場合こちらで大文字に揃える必要がある ut::io::ipp::convertToUppercase(u16Target_); } ut::io::ipp::convertUTF16ToUTF8(u16Target_, u8Target_, false); } virtual ~PlainMatcher() {} bool operator()(const char* line, int len); bool operator()(const wchar_t* line, int len); private: ut::ipp::ipp_string u8Target_; ut::ipp::ipp_wstring u16Target_; bool complete_; bool caseInsensitive_; }; class RegexMatcher : public Matcher { public: explicit RegexMatcher(std::shared_ptr<IppRegExpState> state, std::shared_ptr<ut::RegexStateMutex> mutex = nullptr) : state_(state), mutex_(mutex) {} virtual ~RegexMatcher() {} bool operator()(const char* line, int len); bool operator()(const wchar_t* line, int len); private: std::shared_ptr<IppRegExpState> state_; std::shared_ptr<ut::RegexStateMutex> mutex_; }; class TLSRegexMatcher : public Matcher { public: TLSRegexMatcher(const ut::ipp::ipp_string& pattern, const ut::ipp::ipp_string& options); virtual ~TLSRegexMatcher() {} bool operator()(const char* line, int len); bool operator()(const wchar_t* line, int len); private: typedef std::shared_ptr<IppRegExpState> SharedRegExpState; const ut::ipp::ipp_string pattern_; const ut::ipp::ipp_string options_; tbb::enumerable_thread_specific<SharedRegExpState, tbb::cache_aligned_allocator<SharedRegExpState>, tbb::ets_key_per_instance> state_; }; class NegativeMatcher : public Matcher { public: explicit NegativeMatcher(std::shared_ptr<Matcher> matcher) : matcher_(matcher) {} virtual ~NegativeMatcher() {} bool operator()(const char* line, int len); bool operator()(const wchar_t* line, int len); private: std::shared_ptr<Matcher> matcher_; }; class AndMatcher : public Matcher { public: template <class T> AndMatcher(T first, T last) : matchers_(first, last) {} AndMatcher() {} explicit AndMatcher(const std::vector<std::shared_ptr<Matcher> >& matchers) : matchers_(matchers.cbegin(), matchers.cend()) {} virtual ~AndMatcher() {} void addMatcher(const std::shared_ptr<Matcher>& matcher); bool operator()(const char* line, int len); bool operator()(const wchar_t* line, int len); private: std::vector<std::shared_ptr<Matcher> > matchers_; }; class OrMatcher : public Matcher { public: OrMatcher() {} template <class T> OrMatcher(T first, T last) : matchers_(first, last) {} explicit OrMatcher(const std::vector<std::shared_ptr<Matcher> >& matchers) : matchers_(matchers.cbegin(), matchers.cend()) {} virtual ~OrMatcher() {} void addMatcher(const std::shared_ptr<Matcher>& matcher); bool operator()(const char* line, int len); bool operator()(const wchar_t* line, int len); private: std::vector<std::shared_ptr<Matcher> > matchers_; }; std::shared_ptr<ut::Matcher> createMatcherFromRegex(const ut::ipp::ipp_wstring& keyword, bool caseInsensitive); std::shared_ptr<ut::Matcher> createMatcherFromMultiCond(const ut::ipp::ipp_wstring& keyword, bool caseInsensitive, bool isRegex); namespace io { class FileNameFilter : public Filter { public: explicit FileNameFilter(std::shared_ptr<Matcher> matcher, std::shared_ptr<Filter> parentFilter = nullptr) : matcher_(matcher), Filter(parentFilter) {} virtual ~FileNameFilter() {} bool operator()(bool isDirectory, const ut::ipp::ipp_wstring& path) const; private: std::shared_ptr<Matcher> matcher_; }; class DirNameFilter : public Filter { public: explicit DirNameFilter(std::shared_ptr<Matcher> matcher, std::shared_ptr<Filter> parentFilter = nullptr) : matcher_(matcher), Filter(parentFilter) {} virtual ~DirNameFilter() {} bool operator()(bool isDirectory, const ut::ipp::ipp_wstring& path) const; private: std::shared_ptr<Matcher> matcher_; }; } template <class T> class SearchResult { public: typedef T string; SearchResult(); SearchResult(string&& path, long index, string&& matchedString); SearchResult(string&& directory, string&& filename, long index, string&& matchedString); const string& getDirectory() const; const string& getFilename() const; const string& getMatchedString() const; const string getPath() const; long getIndex() const; private: string directory_; string filename_; string matchedString_; long index_; }; template <class T> class SearchError { public: enum class ErrorType { SEARCH_ERROR, SEARCH_WARNING, SEARCH_MESSAGE }; typedef T string; SearchError(); SearchError(ErrorType type, string&& path, string&& what); SearchError(ErrorType type, string&& directory, string&& filename, string&& what); ErrorType getErrorType() const; const string& getDirectory() const; const string& getFilename() const; const string& getWhat() const; const string getPath() const; private: ErrorType type_; string directory_; string filename_; string what_; }; template <class T> bool operator==(const SearchResult<T>& lhs, const SearchResult<T>& rhs); template <class T> bool operator!=(const SearchResult<T>& lhs, const SearchResult<T>& rhs); template <class T> bool operator==(const SearchError<T>& lhs, const SearchError<T>& rhs); template <class T> bool operator!=(const SearchError<T>& lhs, const SearchError<T>& rhs); typedef SearchResult<std::wstring> IppSearchResult; typedef tbb::concurrent_vector<IppSearchResult> IppSearchResults; typedef SearchError<std::wstring> IppSearchError; typedef tbb::concurrent_vector<IppSearchError> IppSearchErrors; template <class T> void parallelGrepEachFile(T first, T last, bool recursive, io::icu::CharsetDetector& detector, Matcher& matcher, IppSearchResults& result, IppSearchErrors& error, const io::Filter* filter = nullptr, tbb::atomic<bool>* requestStop = nullptr); template <class T, class F> void parallelGrepEachFile(T first, T last, bool recursive, io::icu::CharsetDetector& detector, Matcher& matcher, F& callback, const io::Filter* filter = nullptr, tbb::atomic<bool>* requestStop = nullptr); template <class T> void parallelGrepEachFile(const T& path, bool recursive, io::icu::CharsetDetector& detector, Matcher& matcher, IppSearchResults& result, IppSearchErrors& error, const io::Filter* filter = nullptr, tbb::atomic<bool>* requestStop = nullptr); template <class T, class F> void parallelGrepEachFile(const T& path, bool recursive, io::icu::CharsetDetector& detector, Matcher& matcher, F& callback, const io::Filter* filter = nullptr, tbb::atomic<bool>* requestStop = nullptr); template <class T> void grepEachFile(const T& path, bool recursive, io::icu::CharsetDetector& detector, Matcher& matcher, IppSearchResults& result, IppSearchErrors& error, const io::Filter* filter = nullptr); template <class T, class F> void grepEachFile(const T& path, bool recursive, io::icu::CharsetDetector& detector, Matcher& matcher, F& callback, const io::Filter* filter = nullptr); template <class T> void grepEachLine(const T& path, HANDLE file, io::icu::CharsetDetector& detector, Matcher& matcher, IppSearchResults& result); template <class T, class F> void grepEachLine(const T& path, HANDLE file, io::icu::CharsetDetector& detector, Matcher& matcher, F& callback); template <class T> void grepEachLine(const T& path, io::ForEachLine& iterator, Matcher& matcher, IppSearchResults& result); template <class T, class F> void grepEachLine(const T& path, io::ForEachLine& iterator, Matcher& matcher, F& callback); template <class T> void grepEachLineQuietly(const T& path, io::icu::CharsetDetector& detector, Matcher& matcher, IppSearchResults& result, IppSearchErrors& error); template <class T, class F> void grepEachLineQuietly(const T& path, io::icu::CharsetDetector& detector, Matcher& matcher, F& callback); template <class T> void grepEachLineQuietly(const T& path, HANDLE file, io::icu::CharsetDetector& detector, Matcher& matcher, IppSearchResults& result, IppSearchErrors& error); template <class T, class F> void grepEachLineQuietly(const T& path, HANDLE file, io::icu::CharsetDetector& detector, Matcher& matcher, F& callback); template <class T> void grepEachLineQuietly(const T& path, io::ForEachLine& iterator, Matcher& matcher, IppSearchResults& result, IppSearchErrors& error); template <class T, class F> void grepEachLineQuietly(const T& path, io::ForEachLine& iterator, Matcher& matcher, F& callback); template <class T> std::shared_ptr<IppRegExpState> compilePattern(const T& pattern, const T& options, int* err = nullptr); bool matches(const char* line, int len, IppRegExpState* state, ut::RegexStateMutex* mutex = nullptr); template <class T> bool matches(const T& line, IppRegExpState* state, ut::RegexStateMutex* mutex = nullptr); } // Matcher template <class T> bool ut::Matcher::operator()(const T& line) { return (*this)(line.data(), line.size()); } // SearchResult template <class T> ut::SearchResult<T>::SearchResult() : index_(0) {} template <class T> ut::SearchResult<T>::SearchResult(string&& path, long index, string&& matchedString) : directory_(ut::getDirectory(path)), filename_(ut::getFilename(path)), matchedString_(std::move(matchedString)), index_(index) {} template <class T> ut::SearchResult<T>::SearchResult(string&& directory, string&& filename, long index, string&& matchedString) : directory_(std::move(directory)), filename_(std::move(filename)), matchedString_(std::move(matchedString)), index_(index) { boost::algorithm::trim_right_if(directory_, boost::algorithm::is_any_of(string(L"\\"))); } template <class T> const typename ut::SearchResult<T>::string& ut::SearchResult<T>::getDirectory() const { return directory_; } template <class T> const typename ut::SearchResult<T>::string& ut::SearchResult<T>::getFilename() const { return filename_; } template <class T> const typename ut::SearchResult<T>::string& ut::SearchResult<T>::getMatchedString() const { return matchedString_; } template <class T> const typename ut::SearchResult<T>::string ut::SearchResult<T>::getPath() const { return directory_ + L'\\' + filename_; } template <class T> long ut::SearchResult<T>::getIndex() const { return index_; } template <class T> bool ut::operator==(const SearchResult<T>& lhs, const SearchResult<T>& rhs) { return lhs.getDirectory() == rhs.getDirectory() && lhs.getFilename() == rhs.getFilename() && lhs.getMatchedString() == rhs.getMatchedString() && lhs.getIndex() == rhs.getIndex(); } template <class T> bool ut::operator!=(const SearchResult<T>& lhs, const SearchResult<T>& rhs) { return !(lhs == rhs); } // SearchError template <class T> ut::SearchError<T>::SearchError() : type_(ERROR) {} template <class T> ut::SearchError<T>::SearchError(ErrorType type, string&& path, string&& what) : type_(type), directory_(ut::getDirectory(path)), filename_(ut::getFilename(path)), what_(std::move(what)) {} template <class T> ut::SearchError<T>::SearchError(ErrorType type, string&& directory, string&& filename, string&& what) : type_(type), directory_(std::move(directory)), filename_(std::move(filename)), what_(std::move(what)) { boost::algorithm::trim_right_if(directory_, boost::algorithm::is_any_of(string(L"\\"))); } template <class T> typename ut::SearchError<T>::ErrorType ut::SearchError<T>::getErrorType() const { return type_; } template <class T> const typename ut::SearchError<T>::string& ut::SearchError<T>::getDirectory() const { return directory_; } template <class T> const typename ut::SearchError<T>::string& ut::SearchError<T>::getFilename() const { return filename_; } template <class T> const typename ut::SearchError<T>::string& ut::SearchError<T>::getWhat() const { return what_; } template <class T> const typename ut::SearchError<T>::string ut::SearchError<T>::getPath() const { return directory_ + L'\\' + filename_; } template <class T> bool ut::operator==(const SearchError<T>& lhs, const SearchError<T>& rhs) { return lhs.getErrorType() == rhs.getErrorType() && lhs.getDirectory() == rhs.getDirectory() && lhs.getFilename() == rhs.getFilename() && lhs.getWhat() == rhs.getWhat(); } template <class T> bool ut::operator!=(const SearchError<T>& lhs, const SearchError<T>& rhs) { return !(lhs == rhs); } template <class T> void ut::parallelGrepEachFile(T first, T last, bool recursive, io::icu::CharsetDetector& detector, Matcher& matcher, IppSearchResults& result, IppSearchErrors& error, const io::Filter* filter, tbb::atomic<bool>* requestStop) { tbb::parallel_for_each(first, last, [recursive, &detector, &matcher, &result, &error, filter, requestStop](const typename T::value_type& path) { parallelGrepEachFile(path, recursive, detector, matcher, result, error, filter, requestStop); }); } template <class T, class F> void ut::parallelGrepEachFile(T first, T last, bool recursive, io::icu::CharsetDetector& detector, Matcher& matcher, F& callback, const io::Filter* filter, tbb::atomic<bool>* requestStop) { tbb::parallel_for_each(first, last, [recursive, &detector, &matcher, &callback, filter, requestStop](const typename T::value_type& path) { parallelGrepEachFile(path, recursive, detector, matcher, callback, filter, requestStop); }); } namespace { class GrepEachFileWaitTask : public tbb::task { public: explicit GrepEachFileWaitTask(HANDLE event) : event_(event) {} virtual ~GrepEachFileWaitTask() {} tbb::task* execute() { WaitForSingleObject(event_, INFINITE); return nullptr; } private: HANDLE event_; }; template <class T, class F> class GrepEachFileTask : public tbb::task { public: GrepEachFileTask(bool isDirectory, const T& path, bool recursive, HANDLE file, ut::io::icu::CharsetDetector& detector, ut::Matcher& matcher, F& callback, const ut::io::Filter* filter = nullptr, tbb::atomic<bool>* requestStop = nullptr) : isDirectory_(isDirectory), path_(path), recursive_(recursive), file_(file), detector_(detector), matcher_(matcher), callback_(callback), filter_(filter), requestStop_(requestStop) {} virtual ~GrepEachFileTask() {} tbb::task* execute() { if (isStopRequested()) { return nullptr; } if (isDirectory_) { tbb::empty_task& c(*new(allocate_continuation()) tbb::empty_task()); tbb::task_list list; c.set_ref_count(1); list.push_back(*new(c.allocate_child()) tbb::empty_task()); ut::forEachFile([this, &c, &list](bool isDirectory, const T& path, const T& basename) -> bool { if (isStopRequested()) { return false; } const T filename(path + L'\\' + basename); if (isDirectory) { if (!recursive_) { return false; } if (filter_ && !(*filter_)(true, basename)) { return false; } c.increment_ref_count(); list.push_back(*new(c.allocate_child()) GrepEachFileTask<T, F>(true, filename, recursive_, nullptr, detector_, matcher_, callback_, filter_, requestStop_)); return false; } else { if (filter_ && !(*filter_)(false, basename)) { return false; } CHandle file(ut::io::openFile(filename)); if (file.m_h != INVALID_HANDLE_VALUE) { if (filter_ && !(*filter_)(file.m_h)) { return false; } c.increment_ref_count(); list.push_back(*new(c.allocate_child()) GrepEachFileTask<T, F>(false, filename, recursive_, file.Detach(), detector_, matcher_, callback_, filter_, requestStop_)); return true; } else { callback_(ut::IppSearchError(ut::IppSearchError::ErrorType::SEARCH_ERROR, std::wstring(filename.cbegin(), filename.cend()), std::wstring(ut::getLastErrorMessage()))); return false; } } }, path_, recursive_); spawn(list); return nullptr; } else { if (!isStopRequested()) { ut::grepEachLineQuietly(path_, file_, detector_, matcher_, callback_); } } return nullptr; } bool isStopRequested() const { return requestStop_ && *requestStop_; } private: bool isDirectory_; const T path_; bool recursive_; const CHandle file_; ut::io::icu::CharsetDetector& detector_; ut::Matcher& matcher_; F& callback_; const ut::io::Filter* filter_; tbb::atomic<bool>* requestStop_; }; } template <class T> void ut::parallelGrepEachFile(const T& path, bool recursive, io::icu::CharsetDetector& detector, Matcher& matcher, IppSearchResults& result, IppSearchErrors& error, const io::Filter* filter, tbb::atomic<bool>* requestStop) { IppSearchResults resultLocal; IppSearchErrors errorLocal; class GrepEachFileCallback { public: GrepEachFileCallback(IppSearchResults& result, IppSearchErrors& error) : result_(result), error_(error) {} void operator()(const IppSearchResult& result) { result_.push_back(result); } void operator()(const IppSearchError& error) { error_.push_back(error); } private: IppSearchResults& result_; IppSearchErrors& error_; } callback(resultLocal, errorLocal); parallelGrepEachFile(path, recursive, detector, matcher, callback, filter, requestStop); std::copy(resultLocal.cbegin(), resultLocal.cend(), std::back_inserter(result)); std::copy(errorLocal.cbegin(), errorLocal.cend(), std::back_inserter(error)); } template <class T, class F> void ut::parallelGrepEachFile(const T& path, bool recursive, io::icu::CharsetDetector& detector, Matcher& matcher, F& callback, const io::Filter* filter, tbb::atomic<bool>* requestStop) { const DWORD attr = ut::getFileAttributes(std::wstring(path.cbegin(), path.cend())); if (attr != -1 && !(attr & FILE_ATTRIBUTE_DIRECTORY)) { ut::grepEachLineQuietly(path, detector, matcher, callback); } else { tbb::task::spawn_root_and_wait(*new(tbb::task::allocate_root()) GrepEachFileTask<T, F>(true, path, recursive, nullptr, detector, matcher, callback, filter, requestStop)); } } template <class T> void ut::grepEachFile(const T& path, bool recursive, io::icu::CharsetDetector& detector, Matcher& matcher, IppSearchResults& result, IppSearchErrors& error, const io::Filter* filter) { std::vector<IppSearchResult> resultLocal; std::vector<IppSearchError> errorLocal; class GrepEachFileCallback { public: GrepEachFileCallback(std::vector<IppSearchResult>& result, std::vector<IppSearchError>& error) : result_(result), error_(error) {} void operator()(const IppSearchResult& result) { result_.push_back(result); } void operator()(const IppSearchError& error) { error_.push_back(error); } private: std::vector<IppSearchResult>& result_; std::vector<IppSearchError>& error_; } callback(resultLocal, errorLocal); grepEachFile(path, recursive, detector, matcher, callback, filter); std::copy(resultLocal.cbegin(), resultLocal.cend(), std::back_inserter(result)); std::copy(errorLocal.cbegin(), errorLocal.cend(), std::back_inserter(error)); } template <class T, class F> void ut::grepEachFile(const T& path, bool recursive, io::icu::CharsetDetector& detector, Matcher& matcher, F& callback, const io::Filter* filter) { const DWORD attr = ut::getFileAttributes(std::wstring(path.cbegin(), path.cend())); if (attr != -1 && !(attr & FILE_ATTRIBUTE_DIRECTORY)) { ut::grepEachLineQuietly(path, detector, matcher, callback); } else { ut::forEachFile([&detector, &matcher, &callback, filter](bool isDirectory, const T& path, const T& basename) -> bool { if (isDirectory) { if (filter && !(*filter)(true, basename)) { return false; } } else { if (filter && !(*filter)(false, basename)) { return false; } const T filename(path + L'\\' + basename); const CHandle file(ut::io::openFile(filename)); if (file.m_h != INVALID_HANDLE_VALUE) { if (filter && !(*filter)(file.m_h)) { return false; } grepEachLineQuietly(filename, file.m_h, detector, matcher, callback); } else { callback(ut::IppSearchError(ut::IppSearchError::SEARCH_ERROR, std::wstring(filename.cbegin(), filename.cend()), std::wstring(ut::getLastErrorMessage()))); return false; } } return true; }, path, recursive); } } template <class T> void ut::grepEachLine(const T& path, HANDLE file, io::icu::CharsetDetector& detector, Matcher& matcher, IppSearchResults& result) { if (file != INVALID_HANDLE_VALUE) { io::ForEachLine forEachLine(file, detector); grepEachLine(path, forEachLine, matcher, result); } else { BOOST_THROW_EXCEPTION(io::IOException(ut::getLastErrorMessage())); } } template <class T, class F> void ut::grepEachLine(const T& path, HANDLE file, io::icu::CharsetDetector& detector, Matcher& matcher, F& onMatch) { if (file != INVALID_HANDLE_VALUE) { io::ForEachLine forEachLine(file, detector); grepEachLine(path, forEachLine, matcher, onMatch); } else { BOOST_THROW_EXCEPTION(io::IOException(ut::getLastErrorMessage())); } } template <class T> void ut::grepEachLine(const T& path, io::ForEachLine& iterator, Matcher& matcher, IppSearchResults& result) { std::vector<IppSearchResult> resultLocal; grepEachLine(path, iterator, matcher, [&resultLocal](const IppSearchResult& result) { resultLocal.push_back(result); }); std::copy(resultLocal.cbegin(), resultLocal.cend(), std::back_inserter(result)); } template <class T, class F> void ut::grepEachLine(const T& path, io::ForEachLine& iterator, Matcher& matcher, F& onMatch) { class ForEachLineCallback { public: ForEachLineCallback(const T& path, Matcher& matcher, F& onMatch) : path_(path), matcher_(matcher), onMatch_(onMatch) { line_.reserve(1024); wline_.reserve(1024); } bool operator()(const char* first, const char* last, long index) // UTF-8 { if (!ut::io::icu::all(first, last - first, [](int32_t c) -> bool { return c != 0 && (u_isprint(c) || c == '\t'); })) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"不正なフォーマットです。")); } if (matcher_(first, last - first)) { ut::io::ipp::convertUTF8ToUTF16(first, last - first, wline_, false); if (ut::io::hasBOM(wline_)) { onMatch_(IppSearchResult(std::wstring(path_.cbegin(), path_.cend()), index, std::wstring(wline_.cbegin() + 1, wline_.cend()))); } else { onMatch_(IppSearchResult(std::wstring(path_.cbegin(), path_.cend()), index, std::wstring(wline_.cbegin(), wline_.cend()))); } } return true; } bool operator()(const wchar_t* first, const wchar_t* last, long index) // UTF-16LE { if (!ut::io::icu::all(first, last - first, [](int32_t c) -> bool { return c != 0 && (u_isprint(c) || c == '\t'); })) { BOOST_THROW_EXCEPTION(ut::io::IOException(L"不正なフォーマットです。")); } if (matcher_(first, last - first)) { onMatch_(IppSearchResult(std::wstring(path_.cbegin(), path_.cend()), index, std::wstring(first, last))); } return true; } private: const T path_; Matcher& matcher_; F& onMatch_; ipp::ipp_string line_; ipp::ipp_wstring wline_; } callback(path, matcher, onMatch); ut::io::forEachLine(iterator, callback); } template <class T> void ut::grepEachLineQuietly(const T& path, io::icu::CharsetDetector& detector, Matcher& matcher, IppSearchResults& result, IppSearchErrors& error) { grepEachLineQuietly(path, CHandle(ut::io::openFile(path)), detector, matcher, result, error); } template <class T, class F> void ut::grepEachLineQuietly(const T& path, io::icu::CharsetDetector& detector, Matcher& matcher, F& callback) { grepEachLineQuietly(path, CHandle(ut::io::openFile(path)), detector, matcher, callback); } template <class T> void ut::grepEachLineQuietly(const T& path, HANDLE file, io::icu::CharsetDetector& detector, Matcher& matcher, IppSearchResults& result, IppSearchErrors& error) { if (file != INVALID_HANDLE_VALUE) { io::ForEachLine forEachLine(file, detector); grepEachLineQuietly(path, forEachLine, matcher, result, error); } else { // TODO: 例外とエラーレベルのひも付け error.push_back(ut::IppSearchError(ut::IppSearchError::SEARCH_ERROR, std::wstring(path.cbegin(), path.cend()), std::wstring(ut::getLastErrorMessage()))); } } template <class T, class F> void ut::grepEachLineQuietly(const T& path, HANDLE file, io::icu::CharsetDetector& detector, Matcher& matcher, F& callback) { if (file != INVALID_HANDLE_VALUE) { io::ForEachLine forEachLine(file, detector); grepEachLineQuietly(path, forEachLine, matcher, callback); } else { // TODO: 例外とエラーレベルのひも付け callback(ut::IppSearchError(ut::IppSearchError::ErrorType::SEARCH_ERROR, std::wstring(path.cbegin(), path.cend()), std::wstring(ut::getLastErrorMessage()))); } } template <class T> void ut::grepEachLineQuietly(const T& path, io::ForEachLine& iterator, Matcher& matcher, IppSearchResults& result, IppSearchErrors& error) { try { ut::grepEachLine(path, iterator, matcher, result); } catch (const std::exception& e) { // TODO: 例外とエラーレベルのひも付け error.push_back(ut::IppSearchError(ut::IppSearchError::ErrorType::SEARCH_ERROR, std::wstring(path.cbegin(), path.cend()), ut::encode(e.what()))); } catch (...) { // TODO: 例外とエラーレベルのひも付け error.push_back(ut::IppSearchError(ut::IppSearchError::ErrorType::SEARCH_ERROR, std::wstring(path.cbegin(), path.cend()), std::wstring(L"不明なエラー"))); } } template <class T, class F> void ut::grepEachLineQuietly(const T& path, io::ForEachLine& iterator, Matcher& matcher, F& callback) { try { ut::grepEachLine(path, iterator, matcher, callback); } catch (const std::exception& e) { // TODO: 例外とエラーレベルのひも付け callback(ut::IppSearchError(ut::IppSearchError::ErrorType::SEARCH_ERROR, std::wstring(path.cbegin(), path.cend()), ut::encode(e.what()))); } catch (...) { // TODO: 例外とエラーレベルのひも付け callback(ut::IppSearchError(ut::IppSearchError::ErrorType::SEARCH_ERROR, std::wstring(path.cbegin(), path.cend()), std::wstring(L"不明なエラー"))); } } template <class T> std::shared_ptr<IppRegExpState> ut::compilePattern(const T& pattern, const T& options, int* err) { IppRegExpState* state = nullptr; int errLocal = 0; IppStatus status = ippsRegExpInitAlloc(pattern.c_str(), options.empty() ? nullptr : options.c_str(), &state, &errLocal); if (status != ippStsNoErr) { if (status == ippStsNullPtrErr) { BOOST_THROW_EXCEPTION(ut::RegexException(L"ポインタが NULL です")); } else if (status == ippStsRegExpOptionsErr) { BOOST_THROW_EXCEPTION(ut::RegexException(L"指定したオプションに誤りがあります")); } else if (status == ippStsRegExpQuantifierErr) { BOOST_THROW_EXCEPTION(ut::RegexException(L"修飾子に誤りがあります")); } else if (status == ippStsRegExpGroupingErr) { BOOST_THROW_EXCEPTION(ut::RegexException(L"グループ化に誤りがあります")); } else if (status == ippStsRegExpBackRefErr) { BOOST_THROW_EXCEPTION(ut::RegexException(L"後方参照に誤りがあります")); } else if (status == ippStsRegExpChClassErr) { BOOST_THROW_EXCEPTION(ut::RegexException(L"文字クラスに誤りがあります")); } else if (status == ippStsRegExpMetaChErr) { BOOST_THROW_EXCEPTION(ut::RegexException(L"メタ文字に誤りがあります")); } else { BOOST_THROW_EXCEPTION(ut::RegexException(L"ippsRegExpInitAlloc failed.")); } } if (err) { *err = errLocal; } ippsRegExpSetFormat(ippFmtUTF8, state); return std::shared_ptr<IppRegExpState>(state, ut::SafeRegExpFreer()); } template <class T> inline bool ut::matches(const T& line, IppRegExpState* state, ut::RegexStateMutex* mutex) { return matches(line.data(), line.size(), state, mutex); } #endif

コメント