First commit

author: Joe Zhao <ztuowen@gmail.com> 2014-04-14 08:14:45 +0800
committer: Joe Zhao <ztuowen@gmail.com> 2014-04-14 08:14:45 +0800
commit: cccccbf6cca94a3eaf813b4468453160e91c332b (patch)
tree: 23418cb73a10ae3b0688681a7f0ba9b06424583e /src/TNetLib
download: tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.tar.gz
tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.tar.bz2
tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.zip
53 files changed, 10303 insertions, 0 deletions
diff --git a/src/TNetLib/.depend.mk b/src/TNetLib/.depend.mk
new file mode 100644
index 0000000..dd073f6
--- /dev/null
+++ b/src/TNetLib/.depend.mk
@@ -0,0 +1,946 @@
+Activation.o: Activation.cc Activation.h Component.h ../KaldiLib/Vector.h \
+ /usr/include/c++/4.6/cstddef \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++config.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/os_defines.h \
+ /usr/include/features.h /usr/include/bits/predefs.h \
+ /usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \
+ /usr/include/gnu/stubs.h /usr/include/gnu/stubs-32.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/cpu_defines.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stddef.h \
+ /usr/include/c++/4.6/cstdlib /usr/include/stdlib.h \
+ /usr/include/bits/waitflags.h /usr/include/bits/waitstatus.h \
+ /usr/include/endian.h /usr/include/bits/endian.h \
+ /usr/include/bits/byteswap.h /usr/include/xlocale.h \
+ /usr/include/sys/types.h /usr/include/bits/types.h \
+ /usr/include/bits/typesizes.h /usr/include/time.h \
+ /usr/include/sys/select.h /usr/include/bits/select.h \
+ /usr/include/bits/sigset.h /usr/include/bits/time.h \
+ /usr/include/bits/select2.h /usr/include/sys/sysmacros.h \
+ /usr/include/bits/pthreadtypes.h /usr/include/alloca.h \
+ /usr/include/bits/stdlib.h /usr/include/c++/4.6/stdexcept \
+ /usr/include/c++/4.6/exception /usr/include/c++/4.6/string \
+ /usr/include/c++/4.6/bits/stringfwd.h \
+ /usr/include/c++/4.6/bits/char_traits.h \
+ /usr/include/c++/4.6/bits/stl_algobase.h \
+ /usr/include/c++/4.6/bits/functexcept.h \
+ /usr/include/c++/4.6/bits/exception_defines.h \
+ /usr/include/c++/4.6/bits/cpp_type_traits.h \
+ /usr/include/c++/4.6/ext/type_traits.h \
+ /usr/include/c++/4.6/ext/numeric_traits.h \
+ /usr/include/c++/4.6/bits/stl_pair.h /usr/include/c++/4.6/bits/move.h \
+ /usr/include/c++/4.6/bits/concept_check.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_types.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/4.6/bits/stl_iterator.h \
+ /usr/include/c++/4.6/debug/debug.h /usr/include/c++/4.6/bits/postypes.h \
+ /usr/include/c++/4.6/cwchar /usr/include/wchar.h /usr/include/stdio.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stdarg.h \
+ /usr/include/bits/wchar.h /usr/include/bits/wchar2.h \
+ /usr/include/c++/4.6/bits/allocator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++allocator.h \
+ /usr/include/c++/4.6/ext/new_allocator.h /usr/include/c++/4.6/new \
+ /usr/include/c++/4.6/bits/localefwd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++locale.h \
+ /usr/include/c++/4.6/clocale /usr/include/locale.h \
+ /usr/include/bits/locale.h /usr/include/c++/4.6/iosfwd \
+ /usr/include/c++/4.6/cctype /usr/include/ctype.h \
+ /usr/include/c++/4.6/bits/ostream_insert.h \
+ /usr/include/c++/4.6/bits/cxxabi_forced.h \
+ /usr/include/c++/4.6/bits/stl_function.h \
+ /usr/include/c++/4.6/backward/binders.h \
+ /usr/include/c++/4.6/bits/range_access.h \
+ /usr/include/c++/4.6/bits/basic_string.h \
+ /usr/include/c++/4.6/ext/atomicity.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr-default.h \
+ /usr/include/pthread.h /usr/include/sched.h /usr/include/bits/sched.h \
+ /usr/include/bits/timex.h /usr/include/bits/setjmp.h \
+ /usr/include/unistd.h /usr/include/bits/posix_opt.h \
+ /usr/include/bits/environments.h /usr/include/bits/confname.h \
+ /usr/include/getopt.h /usr/include/bits/unistd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/atomic_word.h \
+ /usr/include/c++/4.6/initializer_list \
+ /usr/include/c++/4.6/bits/basic_string.tcc /usr/include/c++/4.6/iostream \
+ /usr/include/c++/4.6/ostream /usr/include/c++/4.6/ios \
+ /usr/include/c++/4.6/bits/ios_base.h \
+ /usr/include/c++/4.6/bits/locale_classes.h \
+ /usr/include/c++/4.6/bits/locale_classes.tcc \
+ /usr/include/c++/4.6/streambuf /usr/include/c++/4.6/bits/streambuf.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.h \
+ /usr/include/c++/4.6/bits/locale_facets.h /usr/include/c++/4.6/cwctype \
+ /usr/include/wctype.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_base.h \
+ /usr/include/c++/4.6/bits/streambuf_iterator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_inline.h \
+ /usr/include/c++/4.6/bits/locale_facets.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.tcc \
+ /usr/include/c++/4.6/bits/ostream.tcc /usr/include/c++/4.6/istream \
+ /usr/include/c++/4.6/bits/istream.tcc ../KaldiLib/cblas.h \
+ ../KaldiLib/clapack.h ../KaldiLib/cblas.h ../KaldiLib/Common.h \
+ /usr/include/string.h /usr/include/bits/string3.h \
+ /usr/include/c++/4.6/sstream /usr/include/c++/4.6/bits/sstream.tcc \
+ ../KaldiLib/MathAux.h /usr/include/c++/4.6/cmath /usr/include/math.h \
+ /usr/include/bits/huge_val.h /usr/include/bits/huge_valf.h \
+ /usr/include/bits/huge_vall.h /usr/include/bits/inf.h \
+ /usr/include/bits/nan.h /usr/include/bits/mathdef.h \
+ /usr/include/bits/mathcalls.h /usr/include/bits/mathinline.h \
+ ../KaldiLib/Types.h ../KaldiLib/Error.h /usr/include/execinfo.h \
+ ../KaldiLib/Vector.tcc /usr/include/c++/4.6/cstring \
+ /usr/include/c++/4.6/fstream /usr/include/c++/4.6/bits/codecvt.h \
+ /usr/include/c++/4.6/cstdio /usr/include/libio.h \
+ /usr/include/_G_config.h /usr/include/bits/stdio_lim.h \
+ /usr/include/bits/sys_errlist.h /usr/include/bits/stdio.h \
+ /usr/include/bits/stdio2.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/basic_file.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++io.h \
+ /usr/include/c++/4.6/bits/fstream.tcc /usr/include/c++/4.6/iomanip \
+ ../KaldiLib/Matrix.h ../KaldiLib/Matrix.tcc /usr/include/c++/4.6/cfloat \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/float.h \
+ /usr/include/c++/4.6/typeinfo /usr/include/c++/4.6/algorithm \
+ /usr/include/c++/4.6/utility /usr/include/c++/4.6/bits/stl_relops.h \
+ /usr/include/c++/4.6/bits/stl_algo.h \
+ /usr/include/c++/4.6/bits/algorithmfwd.h \
+ /usr/include/c++/4.6/bits/stl_heap.h \
+ /usr/include/c++/4.6/bits/stl_tempbuf.h \
+ /usr/include/c++/4.6/bits/stl_construct.h /usr/include/c++/4.6/limits \
+ /usr/include/c++/4.6/vector \
+ /usr/include/c++/4.6/bits/stl_uninitialized.h \
+ /usr/include/c++/4.6/bits/stl_vector.h \
+ /usr/include/c++/4.6/bits/stl_bvector.h \
+ /usr/include/c++/4.6/bits/vector.tcc ../KaldiLib/Vector.h \
+ ../KaldiLib/Matrix.h
+Barrier.o: Barrier.cc /usr/include/pthread.h /usr/include/features.h \
+ /usr/include/bits/predefs.h /usr/include/sys/cdefs.h \
+ /usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
+ /usr/include/gnu/stubs-32.h /usr/include/endian.h \
+ /usr/include/bits/endian.h /usr/include/bits/byteswap.h \
+ /usr/include/sched.h /usr/include/bits/types.h \
+ /usr/include/bits/typesizes.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stddef.h /usr/include/time.h \
+ /usr/include/bits/sched.h /usr/include/bits/time.h \
+ /usr/include/bits/timex.h /usr/include/xlocale.h \
+ /usr/include/bits/pthreadtypes.h /usr/include/bits/setjmp.h \
+ ../KaldiLib/Error.h /usr/include/c++/4.6/iostream \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++config.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/os_defines.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/cpu_defines.h \
+ /usr/include/c++/4.6/ostream /usr/include/c++/4.6/ios \
+ /usr/include/c++/4.6/iosfwd /usr/include/c++/4.6/bits/stringfwd.h \
+ /usr/include/c++/4.6/bits/postypes.h /usr/include/c++/4.6/cwchar \
+ /usr/include/wchar.h /usr/include/stdio.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stdarg.h \
+ /usr/include/bits/wchar.h /usr/include/bits/wchar2.h \
+ /usr/include/c++/4.6/exception /usr/include/c++/4.6/bits/char_traits.h \
+ /usr/include/c++/4.6/bits/stl_algobase.h \
+ /usr/include/c++/4.6/bits/functexcept.h \
+ /usr/include/c++/4.6/bits/exception_defines.h \
+ /usr/include/c++/4.6/bits/cpp_type_traits.h \
+ /usr/include/c++/4.6/ext/type_traits.h \
+ /usr/include/c++/4.6/ext/numeric_traits.h \
+ /usr/include/c++/4.6/bits/stl_pair.h /usr/include/c++/4.6/bits/move.h \
+ /usr/include/c++/4.6/bits/concept_check.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_types.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/4.6/bits/stl_iterator.h \
+ /usr/include/c++/4.6/debug/debug.h /usr/include/c++/4.6/bits/localefwd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++locale.h \
+ /usr/include/c++/4.6/clocale /usr/include/locale.h \
+ /usr/include/bits/locale.h /usr/include/c++/4.6/cctype \
+ /usr/include/ctype.h /usr/include/c++/4.6/bits/ios_base.h \
+ /usr/include/c++/4.6/ext/atomicity.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr-default.h \
+ /usr/include/unistd.h /usr/include/bits/posix_opt.h \
+ /usr/include/bits/environments.h /usr/include/bits/confname.h \
+ /usr/include/getopt.h /usr/include/bits/unistd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/atomic_word.h \
+ /usr/include/c++/4.6/bits/locale_classes.h /usr/include/c++/4.6/string \
+ /usr/include/c++/4.6/bits/allocator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++allocator.h \
+ /usr/include/c++/4.6/ext/new_allocator.h /usr/include/c++/4.6/new \
+ /usr/include/c++/4.6/bits/ostream_insert.h \
+ /usr/include/c++/4.6/bits/cxxabi_forced.h \
+ /usr/include/c++/4.6/bits/stl_function.h \
+ /usr/include/c++/4.6/backward/binders.h \
+ /usr/include/c++/4.6/bits/range_access.h \
+ /usr/include/c++/4.6/bits/basic_string.h \
+ /usr/include/c++/4.6/initializer_list \
+ /usr/include/c++/4.6/bits/basic_string.tcc \
+ /usr/include/c++/4.6/bits/locale_classes.tcc \
+ /usr/include/c++/4.6/streambuf /usr/include/c++/4.6/bits/streambuf.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.h \
+ /usr/include/c++/4.6/bits/locale_facets.h /usr/include/c++/4.6/cwctype \
+ /usr/include/wctype.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_base.h \
+ /usr/include/c++/4.6/bits/streambuf_iterator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_inline.h \
+ /usr/include/c++/4.6/bits/locale_facets.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.tcc \
+ /usr/include/c++/4.6/bits/ostream.tcc /usr/include/c++/4.6/istream \
+ /usr/include/c++/4.6/bits/istream.tcc /usr/include/c++/4.6/stdexcept \
+ /usr/include/c++/4.6/sstream /usr/include/c++/4.6/bits/sstream.tcc \
+ /usr/include/c++/4.6/cstdlib /usr/include/stdlib.h \
+ /usr/include/bits/waitflags.h /usr/include/bits/waitstatus.h \
+ /usr/include/sys/types.h /usr/include/sys/select.h \
+ /usr/include/bits/select.h /usr/include/bits/sigset.h \
+ /usr/include/bits/select2.h /usr/include/sys/sysmacros.h \
+ /usr/include/alloca.h /usr/include/bits/stdlib.h /usr/include/execinfo.h \
+ Barrier.h
+BiasedLinearity.o: BiasedLinearity.cc BiasedLinearity.h Component.h \
+ ../KaldiLib/Vector.h /usr/include/c++/4.6/cstddef \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++config.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/os_defines.h \
+ /usr/include/features.h /usr/include/bits/predefs.h \
+ /usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \
+ /usr/include/gnu/stubs.h /usr/include/gnu/stubs-32.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/cpu_defines.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stddef.h \
+ /usr/include/c++/4.6/cstdlib /usr/include/stdlib.h \
+ /usr/include/bits/waitflags.h /usr/include/bits/waitstatus.h \
+ /usr/include/endian.h /usr/include/bits/endian.h \
+ /usr/include/bits/byteswap.h /usr/include/xlocale.h \
+ /usr/include/sys/types.h /usr/include/bits/types.h \
+ /usr/include/bits/typesizes.h /usr/include/time.h \
+ /usr/include/sys/select.h /usr/include/bits/select.h \
+ /usr/include/bits/sigset.h /usr/include/bits/time.h \
+ /usr/include/bits/select2.h /usr/include/sys/sysmacros.h \
+ /usr/include/bits/pthreadtypes.h /usr/include/alloca.h \
+ /usr/include/bits/stdlib.h /usr/include/c++/4.6/stdexcept \
+ /usr/include/c++/4.6/exception /usr/include/c++/4.6/string \
+ /usr/include/c++/4.6/bits/stringfwd.h \
+ /usr/include/c++/4.6/bits/char_traits.h \
+ /usr/include/c++/4.6/bits/stl_algobase.h \
+ /usr/include/c++/4.6/bits/functexcept.h \
+ /usr/include/c++/4.6/bits/exception_defines.h \
+ /usr/include/c++/4.6/bits/cpp_type_traits.h \
+ /usr/include/c++/4.6/ext/type_traits.h \
+ /usr/include/c++/4.6/ext/numeric_traits.h \
+ /usr/include/c++/4.6/bits/stl_pair.h /usr/include/c++/4.6/bits/move.h \
+ /usr/include/c++/4.6/bits/concept_check.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_types.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/4.6/bits/stl_iterator.h \
+ /usr/include/c++/4.6/debug/debug.h /usr/include/c++/4.6/bits/postypes.h \
+ /usr/include/c++/4.6/cwchar /usr/include/wchar.h /usr/include/stdio.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stdarg.h \
+ /usr/include/bits/wchar.h /usr/include/bits/wchar2.h \
+ /usr/include/c++/4.6/bits/allocator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++allocator.h \
+ /usr/include/c++/4.6/ext/new_allocator.h /usr/include/c++/4.6/new \
+ /usr/include/c++/4.6/bits/localefwd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++locale.h \
+ /usr/include/c++/4.6/clocale /usr/include/locale.h \
+ /usr/include/bits/locale.h /usr/include/c++/4.6/iosfwd \
+ /usr/include/c++/4.6/cctype /usr/include/ctype.h \
+ /usr/include/c++/4.6/bits/ostream_insert.h \
+ /usr/include/c++/4.6/bits/cxxabi_forced.h \
+ /usr/include/c++/4.6/bits/stl_function.h \
+ /usr/include/c++/4.6/backward/binders.h \
+ /usr/include/c++/4.6/bits/range_access.h \
+ /usr/include/c++/4.6/bits/basic_string.h \
+ /usr/include/c++/4.6/ext/atomicity.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr-default.h \
+ /usr/include/pthread.h /usr/include/sched.h /usr/include/bits/sched.h \
+ /usr/include/bits/timex.h /usr/include/bits/setjmp.h \
+ /usr/include/unistd.h /usr/include/bits/posix_opt.h \
+ /usr/include/bits/environments.h /usr/include/bits/confname.h \
+ /usr/include/getopt.h /usr/include/bits/unistd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/atomic_word.h \
+ /usr/include/c++/4.6/initializer_list \
+ /usr/include/c++/4.6/bits/basic_string.tcc /usr/include/c++/4.6/iostream \
+ /usr/include/c++/4.6/ostream /usr/include/c++/4.6/ios \
+ /usr/include/c++/4.6/bits/ios_base.h \
+ /usr/include/c++/4.6/bits/locale_classes.h \
+ /usr/include/c++/4.6/bits/locale_classes.tcc \
+ /usr/include/c++/4.6/streambuf /usr/include/c++/4.6/bits/streambuf.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.h \
+ /usr/include/c++/4.6/bits/locale_facets.h /usr/include/c++/4.6/cwctype \
+ /usr/include/wctype.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_base.h \
+ /usr/include/c++/4.6/bits/streambuf_iterator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_inline.h \
+ /usr/include/c++/4.6/bits/locale_facets.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.tcc \
+ /usr/include/c++/4.6/bits/ostream.tcc /usr/include/c++/4.6/istream \
+ /usr/include/c++/4.6/bits/istream.tcc ../KaldiLib/cblas.h \
+ ../KaldiLib/clapack.h ../KaldiLib/cblas.h ../KaldiLib/Common.h \
+ /usr/include/string.h /usr/include/bits/string3.h \
+ /usr/include/c++/4.6/sstream /usr/include/c++/4.6/bits/sstream.tcc \
+ ../KaldiLib/MathAux.h /usr/include/c++/4.6/cmath /usr/include/math.h \
+ /usr/include/bits/huge_val.h /usr/include/bits/huge_valf.h \
+ /usr/include/bits/huge_vall.h /usr/include/bits/inf.h \
+ /usr/include/bits/nan.h /usr/include/bits/mathdef.h \
+ /usr/include/bits/mathcalls.h /usr/include/bits/mathinline.h \
+ ../KaldiLib/Types.h ../KaldiLib/Error.h /usr/include/execinfo.h \
+ ../KaldiLib/Vector.tcc /usr/include/c++/4.6/cstring \
+ /usr/include/c++/4.6/fstream /usr/include/c++/4.6/bits/codecvt.h \
+ /usr/include/c++/4.6/cstdio /usr/include/libio.h \
+ /usr/include/_G_config.h /usr/include/bits/stdio_lim.h \
+ /usr/include/bits/sys_errlist.h /usr/include/bits/stdio.h \
+ /usr/include/bits/stdio2.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/basic_file.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++io.h \
+ /usr/include/c++/4.6/bits/fstream.tcc /usr/include/c++/4.6/iomanip \
+ ../KaldiLib/Matrix.h ../KaldiLib/Matrix.tcc /usr/include/c++/4.6/cfloat \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/float.h \
+ /usr/include/c++/4.6/typeinfo /usr/include/c++/4.6/algorithm \
+ /usr/include/c++/4.6/utility /usr/include/c++/4.6/bits/stl_relops.h \
+ /usr/include/c++/4.6/bits/stl_algo.h \
+ /usr/include/c++/4.6/bits/algorithmfwd.h \
+ /usr/include/c++/4.6/bits/stl_heap.h \
+ /usr/include/c++/4.6/bits/stl_tempbuf.h \
+ /usr/include/c++/4.6/bits/stl_construct.h /usr/include/c++/4.6/limits \
+ /usr/include/c++/4.6/vector \
+ /usr/include/c++/4.6/bits/stl_uninitialized.h \
+ /usr/include/c++/4.6/bits/stl_vector.h \
+ /usr/include/c++/4.6/bits/stl_bvector.h \
+ /usr/include/c++/4.6/bits/vector.tcc ../KaldiLib/Vector.h \
+ ../KaldiLib/Matrix.h
+BlockArray.o: BlockArray.cc BlockArray.h Component.h ../KaldiLib/Vector.h \
+ /usr/include/c++/4.6/cstddef \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++config.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/os_defines.h \
+ /usr/include/features.h /usr/include/bits/predefs.h \
+ /usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \
+ /usr/include/gnu/stubs.h /usr/include/gnu/stubs-32.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/cpu_defines.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stddef.h \
+ /usr/include/c++/4.6/cstdlib /usr/include/stdlib.h \
+ /usr/include/bits/waitflags.h /usr/include/bits/waitstatus.h \
+ /usr/include/endian.h /usr/include/bits/endian.h \
+ /usr/include/bits/byteswap.h /usr/include/xlocale.h \
+ /usr/include/sys/types.h /usr/include/bits/types.h \
+ /usr/include/bits/typesizes.h /usr/include/time.h \
+ /usr/include/sys/select.h /usr/include/bits/select.h \
+ /usr/include/bits/sigset.h /usr/include/bits/time.h \
+ /usr/include/bits/select2.h /usr/include/sys/sysmacros.h \
+ /usr/include/bits/pthreadtypes.h /usr/include/alloca.h \
+ /usr/include/bits/stdlib.h /usr/include/c++/4.6/stdexcept \
+ /usr/include/c++/4.6/exception /usr/include/c++/4.6/string \
+ /usr/include/c++/4.6/bits/stringfwd.h \
+ /usr/include/c++/4.6/bits/char_traits.h \
+ /usr/include/c++/4.6/bits/stl_algobase.h \
+ /usr/include/c++/4.6/bits/functexcept.h \
+ /usr/include/c++/4.6/bits/exception_defines.h \
+ /usr/include/c++/4.6/bits/cpp_type_traits.h \
+ /usr/include/c++/4.6/ext/type_traits.h \
+ /usr/include/c++/4.6/ext/numeric_traits.h \
+ /usr/include/c++/4.6/bits/stl_pair.h /usr/include/c++/4.6/bits/move.h \
+ /usr/include/c++/4.6/bits/concept_check.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_types.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/4.6/bits/stl_iterator.h \
+ /usr/include/c++/4.6/debug/debug.h /usr/include/c++/4.6/bits/postypes.h \
+ /usr/include/c++/4.6/cwchar /usr/include/wchar.h /usr/include/stdio.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stdarg.h \
+ /usr/include/bits/wchar.h /usr/include/bits/wchar2.h \
+ /usr/include/c++/4.6/bits/allocator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++allocator.h \
+ /usr/include/c++/4.6/ext/new_allocator.h /usr/include/c++/4.6/new \
+ /usr/include/c++/4.6/bits/localefwd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++locale.h \
+ /usr/include/c++/4.6/clocale /usr/include/locale.h \
+ /usr/include/bits/locale.h /usr/include/c++/4.6/iosfwd \
+ /usr/include/c++/4.6/cctype /usr/include/ctype.h \
+ /usr/include/c++/4.6/bits/ostream_insert.h \
+ /usr/include/c++/4.6/bits/cxxabi_forced.h \
+ /usr/include/c++/4.6/bits/stl_function.h \
+ /usr/include/c++/4.6/backward/binders.h \
+ /usr/include/c++/4.6/bits/range_access.h \
+ /usr/include/c++/4.6/bits/basic_string.h \
+ /usr/include/c++/4.6/ext/atomicity.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr-default.h \
+ /usr/include/pthread.h /usr/include/sched.h /usr/include/bits/sched.h \
+ /usr/include/bits/timex.h /usr/include/bits/setjmp.h \
+ /usr/include/unistd.h /usr/include/bits/posix_opt.h \
+ /usr/include/bits/environments.h /usr/include/bits/confname.h \
+ /usr/include/getopt.h /usr/include/bits/unistd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/atomic_word.h \
+ /usr/include/c++/4.6/initializer_list \
+ /usr/include/c++/4.6/bits/basic_string.tcc /usr/include/c++/4.6/iostream \
+ /usr/include/c++/4.6/ostream /usr/include/c++/4.6/ios \
+ /usr/include/c++/4.6/bits/ios_base.h \
+ /usr/include/c++/4.6/bits/locale_classes.h \
+ /usr/include/c++/4.6/bits/locale_classes.tcc \
+ /usr/include/c++/4.6/streambuf /usr/include/c++/4.6/bits/streambuf.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.h \
+ /usr/include/c++/4.6/bits/locale_facets.h /usr/include/c++/4.6/cwctype \
+ /usr/include/wctype.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_base.h \
+ /usr/include/c++/4.6/bits/streambuf_iterator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_inline.h \
+ /usr/include/c++/4.6/bits/locale_facets.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.tcc \
+ /usr/include/c++/4.6/bits/ostream.tcc /usr/include/c++/4.6/istream \
+ /usr/include/c++/4.6/bits/istream.tcc ../KaldiLib/cblas.h \
+ ../KaldiLib/clapack.h ../KaldiLib/cblas.h ../KaldiLib/Common.h \
+ /usr/include/string.h /usr/include/bits/string3.h \
+ /usr/include/c++/4.6/sstream /usr/include/c++/4.6/bits/sstream.tcc \
+ ../KaldiLib/MathAux.h /usr/include/c++/4.6/cmath /usr/include/math.h \
+ /usr/include/bits/huge_val.h /usr/include/bits/huge_valf.h \
+ /usr/include/bits/huge_vall.h /usr/include/bits/inf.h \
+ /usr/include/bits/nan.h /usr/include/bits/mathdef.h \
+ /usr/include/bits/mathcalls.h /usr/include/bits/mathinline.h \
+ ../KaldiLib/Types.h ../KaldiLib/Error.h /usr/include/execinfo.h \
+ ../KaldiLib/Vector.tcc /usr/include/c++/4.6/cstring \
+ /usr/include/c++/4.6/fstream /usr/include/c++/4.6/bits/codecvt.h \
+ /usr/include/c++/4.6/cstdio /usr/include/libio.h \
+ /usr/include/_G_config.h /usr/include/bits/stdio_lim.h \
+ /usr/include/bits/sys_errlist.h /usr/include/bits/stdio.h \
+ /usr/include/bits/stdio2.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/basic_file.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++io.h \
+ /usr/include/c++/4.6/bits/fstream.tcc /usr/include/c++/4.6/iomanip \
+ ../KaldiLib/Matrix.h ../KaldiLib/Matrix.tcc /usr/include/c++/4.6/cfloat \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/float.h \
+ /usr/include/c++/4.6/typeinfo /usr/include/c++/4.6/algorithm \
+ /usr/include/c++/4.6/utility /usr/include/c++/4.6/bits/stl_relops.h \
+ /usr/include/c++/4.6/bits/stl_algo.h \
+ /usr/include/c++/4.6/bits/algorithmfwd.h \
+ /usr/include/c++/4.6/bits/stl_heap.h \
+ /usr/include/c++/4.6/bits/stl_tempbuf.h \
+ /usr/include/c++/4.6/bits/stl_construct.h /usr/include/c++/4.6/limits \
+ /usr/include/c++/4.6/vector \
+ /usr/include/c++/4.6/bits/stl_uninitialized.h \
+ /usr/include/c++/4.6/bits/stl_vector.h \
+ /usr/include/c++/4.6/bits/stl_bvector.h \
+ /usr/include/c++/4.6/bits/vector.tcc ../KaldiLib/Vector.h \
+ ../KaldiLib/Matrix.h Nnet.h BiasedLinearity.h SharedLinearity.h \
+ Activation.h
+Cache.o: Cache.cc /usr/include/sys/time.h /usr/include/features.h \
+ /usr/include/bits/predefs.h /usr/include/sys/cdefs.h \
+ /usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
+ /usr/include/gnu/stubs-32.h /usr/include/bits/types.h \
+ /usr/include/bits/typesizes.h /usr/include/time.h \
+ /usr/include/bits/time.h /usr/include/sys/select.h \
+ /usr/include/bits/select.h /usr/include/bits/sigset.h \
+ /usr/include/bits/select2.h Cache.h ../KaldiLib/Matrix.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stddef.h /usr/include/stdlib.h \
+ /usr/include/bits/waitflags.h /usr/include/bits/waitstatus.h \
+ /usr/include/endian.h /usr/include/bits/endian.h \
+ /usr/include/bits/byteswap.h /usr/include/xlocale.h \
+ /usr/include/sys/types.h /usr/include/sys/sysmacros.h \
+ /usr/include/bits/pthreadtypes.h /usr/include/alloca.h \
+ /usr/include/bits/stdlib.h /usr/include/c++/4.6/stdexcept \
+ /usr/include/c++/4.6/exception \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++config.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/os_defines.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/cpu_defines.h \
+ /usr/include/c++/4.6/string /usr/include/c++/4.6/bits/stringfwd.h \
+ /usr/include/c++/4.6/bits/char_traits.h \
+ /usr/include/c++/4.6/bits/stl_algobase.h \
+ /usr/include/c++/4.6/bits/functexcept.h \
+ /usr/include/c++/4.6/bits/exception_defines.h \
+ /usr/include/c++/4.6/bits/cpp_type_traits.h \
+ /usr/include/c++/4.6/ext/type_traits.h \
+ /usr/include/c++/4.6/ext/numeric_traits.h \
+ /usr/include/c++/4.6/bits/stl_pair.h /usr/include/c++/4.6/bits/move.h \
+ /usr/include/c++/4.6/bits/concept_check.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_types.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/4.6/bits/stl_iterator.h \
+ /usr/include/c++/4.6/debug/debug.h /usr/include/c++/4.6/bits/postypes.h \
+ /usr/include/c++/4.6/cwchar /usr/include/wchar.h /usr/include/stdio.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stdarg.h \
+ /usr/include/bits/wchar.h /usr/include/bits/wchar2.h \
+ /usr/include/c++/4.6/bits/allocator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++allocator.h \
+ /usr/include/c++/4.6/ext/new_allocator.h /usr/include/c++/4.6/new \
+ /usr/include/c++/4.6/bits/localefwd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++locale.h \
+ /usr/include/c++/4.6/clocale /usr/include/locale.h \
+ /usr/include/bits/locale.h /usr/include/c++/4.6/iosfwd \
+ /usr/include/c++/4.6/cctype /usr/include/ctype.h \
+ /usr/include/c++/4.6/bits/ostream_insert.h \
+ /usr/include/c++/4.6/bits/cxxabi_forced.h \
+ /usr/include/c++/4.6/bits/stl_function.h \
+ /usr/include/c++/4.6/backward/binders.h \
+ /usr/include/c++/4.6/bits/range_access.h \
+ /usr/include/c++/4.6/bits/basic_string.h \
+ /usr/include/c++/4.6/ext/atomicity.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr-default.h \
+ /usr/include/pthread.h /usr/include/sched.h /usr/include/bits/sched.h \
+ /usr/include/bits/timex.h /usr/include/bits/setjmp.h \
+ /usr/include/unistd.h /usr/include/bits/posix_opt.h \
+ /usr/include/bits/environments.h /usr/include/bits/confname.h \
+ /usr/include/getopt.h /usr/include/bits/unistd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/atomic_word.h \
+ /usr/include/c++/4.6/initializer_list \
+ /usr/include/c++/4.6/bits/basic_string.tcc /usr/include/c++/4.6/iostream \
+ /usr/include/c++/4.6/ostream /usr/include/c++/4.6/ios \
+ /usr/include/c++/4.6/bits/ios_base.h \
+ /usr/include/c++/4.6/bits/locale_classes.h \
+ /usr/include/c++/4.6/bits/locale_classes.tcc \
+ /usr/include/c++/4.6/streambuf /usr/include/c++/4.6/bits/streambuf.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.h \
+ /usr/include/c++/4.6/bits/locale_facets.h /usr/include/c++/4.6/cwctype \
+ /usr/include/wctype.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_base.h \
+ /usr/include/c++/4.6/bits/streambuf_iterator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_inline.h \
+ /usr/include/c++/4.6/bits/locale_facets.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.tcc \
+ /usr/include/c++/4.6/bits/ostream.tcc /usr/include/c++/4.6/istream \
+ /usr/include/c++/4.6/bits/istream.tcc ../KaldiLib/cblas.h \
+ ../KaldiLib/clapack.h ../KaldiLib/cblas.h ../KaldiLib/Common.h \
+ /usr/include/c++/4.6/cstdlib /usr/include/string.h \
+ /usr/include/bits/string3.h /usr/include/c++/4.6/sstream \
+ /usr/include/c++/4.6/bits/sstream.tcc ../KaldiLib/MathAux.h \
+ /usr/include/c++/4.6/cmath /usr/include/math.h \
+ /usr/include/bits/huge_val.h /usr/include/bits/huge_valf.h \
+ /usr/include/bits/huge_vall.h /usr/include/bits/inf.h \
+ /usr/include/bits/nan.h /usr/include/bits/mathdef.h \
+ /usr/include/bits/mathcalls.h /usr/include/bits/mathinline.h \
+ ../KaldiLib/Types.h ../KaldiLib/Error.h /usr/include/execinfo.h \
+ ../KaldiLib/Matrix.tcc /usr/include/c++/4.6/cfloat \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/float.h \
+ /usr/include/c++/4.6/fstream /usr/include/c++/4.6/bits/codecvt.h \
+ /usr/include/c++/4.6/cstdio /usr/include/libio.h \
+ /usr/include/_G_config.h /usr/include/bits/stdio_lim.h \
+ /usr/include/bits/sys_errlist.h /usr/include/bits/stdio.h \
+ /usr/include/bits/stdio2.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/basic_file.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++io.h \
+ /usr/include/c++/4.6/bits/fstream.tcc /usr/include/c++/4.6/iomanip \
+ /usr/include/c++/4.6/typeinfo /usr/include/c++/4.6/algorithm \
+ /usr/include/c++/4.6/utility /usr/include/c++/4.6/bits/stl_relops.h \
+ /usr/include/c++/4.6/bits/stl_algo.h \
+ /usr/include/c++/4.6/bits/algorithmfwd.h \
+ /usr/include/c++/4.6/bits/stl_heap.h \
+ /usr/include/c++/4.6/bits/stl_tempbuf.h \
+ /usr/include/c++/4.6/bits/stl_construct.h /usr/include/c++/4.6/limits \
+ /usr/include/c++/4.6/vector \
+ /usr/include/c++/4.6/bits/stl_uninitialized.h \
+ /usr/include/c++/4.6/bits/stl_vector.h \
+ /usr/include/c++/4.6/bits/stl_bvector.h \
+ /usr/include/c++/4.6/bits/vector.tcc ../KaldiLib/Vector.h \
+ /usr/include/c++/4.6/cstddef ../KaldiLib/Vector.tcc \
+ /usr/include/c++/4.6/cstring ../KaldiLib/Matrix.h ../KaldiLib/Vector.h
+Mutex.o: Mutex.cc /usr/include/pthread.h /usr/include/features.h \
+ /usr/include/bits/predefs.h /usr/include/sys/cdefs.h \
+ /usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
+ /usr/include/gnu/stubs-32.h /usr/include/endian.h \
+ /usr/include/bits/endian.h /usr/include/bits/byteswap.h \
+ /usr/include/sched.h /usr/include/bits/types.h \
+ /usr/include/bits/typesizes.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stddef.h /usr/include/time.h \
+ /usr/include/bits/sched.h /usr/include/bits/time.h \
+ /usr/include/bits/timex.h /usr/include/xlocale.h \
+ /usr/include/bits/pthreadtypes.h /usr/include/bits/setjmp.h \
+ /usr/include/c++/4.6/cerrno /usr/include/errno.h \
+ /usr/include/bits/errno.h /usr/include/linux/errno.h \
+ /usr/include/asm/errno.h /usr/include/asm-generic/errno.h \
+ /usr/include/asm-generic/errno-base.h ../KaldiLib/Error.h \
+ /usr/include/c++/4.6/iostream \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++config.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/os_defines.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/cpu_defines.h \
+ /usr/include/c++/4.6/ostream /usr/include/c++/4.6/ios \
+ /usr/include/c++/4.6/iosfwd /usr/include/c++/4.6/bits/stringfwd.h \
+ /usr/include/c++/4.6/bits/postypes.h /usr/include/c++/4.6/cwchar \
+ /usr/include/wchar.h /usr/include/stdio.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stdarg.h \
+ /usr/include/bits/wchar.h /usr/include/bits/wchar2.h \
+ /usr/include/c++/4.6/exception /usr/include/c++/4.6/bits/char_traits.h \
+ /usr/include/c++/4.6/bits/stl_algobase.h \
+ /usr/include/c++/4.6/bits/functexcept.h \
+ /usr/include/c++/4.6/bits/exception_defines.h \
+ /usr/include/c++/4.6/bits/cpp_type_traits.h \
+ /usr/include/c++/4.6/ext/type_traits.h \
+ /usr/include/c++/4.6/ext/numeric_traits.h \
+ /usr/include/c++/4.6/bits/stl_pair.h /usr/include/c++/4.6/bits/move.h \
+ /usr/include/c++/4.6/bits/concept_check.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_types.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/4.6/bits/stl_iterator.h \
+ /usr/include/c++/4.6/debug/debug.h /usr/include/c++/4.6/bits/localefwd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++locale.h \
+ /usr/include/c++/4.6/clocale /usr/include/locale.h \
+ /usr/include/bits/locale.h /usr/include/c++/4.6/cctype \
+ /usr/include/ctype.h /usr/include/c++/4.6/bits/ios_base.h \
+ /usr/include/c++/4.6/ext/atomicity.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr-default.h \
+ /usr/include/unistd.h /usr/include/bits/posix_opt.h \
+ /usr/include/bits/environments.h /usr/include/bits/confname.h \
+ /usr/include/getopt.h /usr/include/bits/unistd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/atomic_word.h \
+ /usr/include/c++/4.6/bits/locale_classes.h /usr/include/c++/4.6/string \
+ /usr/include/c++/4.6/bits/allocator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++allocator.h \
+ /usr/include/c++/4.6/ext/new_allocator.h /usr/include/c++/4.6/new \
+ /usr/include/c++/4.6/bits/ostream_insert.h \
+ /usr/include/c++/4.6/bits/cxxabi_forced.h \
+ /usr/include/c++/4.6/bits/stl_function.h \
+ /usr/include/c++/4.6/backward/binders.h \
+ /usr/include/c++/4.6/bits/range_access.h \
+ /usr/include/c++/4.6/bits/basic_string.h \
+ /usr/include/c++/4.6/initializer_list \
+ /usr/include/c++/4.6/bits/basic_string.tcc \
+ /usr/include/c++/4.6/bits/locale_classes.tcc \
+ /usr/include/c++/4.6/streambuf /usr/include/c++/4.6/bits/streambuf.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.h \
+ /usr/include/c++/4.6/bits/locale_facets.h /usr/include/c++/4.6/cwctype \
+ /usr/include/wctype.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_base.h \
+ /usr/include/c++/4.6/bits/streambuf_iterator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_inline.h \
+ /usr/include/c++/4.6/bits/locale_facets.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.tcc \
+ /usr/include/c++/4.6/bits/ostream.tcc /usr/include/c++/4.6/istream \
+ /usr/include/c++/4.6/bits/istream.tcc /usr/include/c++/4.6/stdexcept \
+ /usr/include/c++/4.6/sstream /usr/include/c++/4.6/bits/sstream.tcc \
+ /usr/include/c++/4.6/cstdlib /usr/include/stdlib.h \
+ /usr/include/bits/waitflags.h /usr/include/bits/waitstatus.h \
+ /usr/include/sys/types.h /usr/include/sys/select.h \
+ /usr/include/bits/select.h /usr/include/bits/sigset.h \
+ /usr/include/bits/select2.h /usr/include/sys/sysmacros.h \
+ /usr/include/alloca.h /usr/include/bits/stdlib.h /usr/include/execinfo.h \
+ Mutex.h
+Nnet.o: Nnet.cc /usr/include/c++/4.6/algorithm \
+ /usr/include/c++/4.6/utility \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++config.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/os_defines.h \
+ /usr/include/features.h /usr/include/bits/predefs.h \
+ /usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \
+ /usr/include/gnu/stubs.h /usr/include/gnu/stubs-32.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/cpu_defines.h \
+ /usr/include/c++/4.6/bits/stl_relops.h \
+ /usr/include/c++/4.6/bits/stl_pair.h /usr/include/c++/4.6/bits/move.h \
+ /usr/include/c++/4.6/bits/concept_check.h \
+ /usr/include/c++/4.6/bits/stl_algobase.h \
+ /usr/include/c++/4.6/bits/functexcept.h \
+ /usr/include/c++/4.6/bits/exception_defines.h \
+ /usr/include/c++/4.6/bits/cpp_type_traits.h \
+ /usr/include/c++/4.6/ext/type_traits.h \
+ /usr/include/c++/4.6/ext/numeric_traits.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_types.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/4.6/bits/stl_iterator.h \
+ /usr/include/c++/4.6/debug/debug.h /usr/include/c++/4.6/bits/stl_algo.h \
+ /usr/include/c++/4.6/cstdlib /usr/include/stdlib.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stddef.h \
+ /usr/include/bits/waitflags.h /usr/include/bits/waitstatus.h \
+ /usr/include/endian.h /usr/include/bits/endian.h \
+ /usr/include/bits/byteswap.h /usr/include/xlocale.h \
+ /usr/include/sys/types.h /usr/include/bits/types.h \
+ /usr/include/bits/typesizes.h /usr/include/time.h \
+ /usr/include/sys/select.h /usr/include/bits/select.h \
+ /usr/include/bits/sigset.h /usr/include/bits/time.h \
+ /usr/include/bits/select2.h /usr/include/sys/sysmacros.h \
+ /usr/include/bits/pthreadtypes.h /usr/include/alloca.h \
+ /usr/include/bits/stdlib.h /usr/include/c++/4.6/bits/algorithmfwd.h \
+ /usr/include/c++/4.6/initializer_list \
+ /usr/include/c++/4.6/bits/stl_heap.h \
+ /usr/include/c++/4.6/bits/stl_tempbuf.h \
+ /usr/include/c++/4.6/bits/stl_construct.h /usr/include/c++/4.6/new \
+ /usr/include/c++/4.6/exception /usr/include/c++/4.6/cctype \
+ /usr/include/ctype.h Nnet.h Component.h ../KaldiLib/Vector.h \
+ /usr/include/c++/4.6/cstddef /usr/include/c++/4.6/stdexcept \
+ /usr/include/c++/4.6/string /usr/include/c++/4.6/bits/stringfwd.h \
+ /usr/include/c++/4.6/bits/char_traits.h \
+ /usr/include/c++/4.6/bits/postypes.h /usr/include/c++/4.6/cwchar \
+ /usr/include/wchar.h /usr/include/stdio.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stdarg.h \
+ /usr/include/bits/wchar.h /usr/include/bits/wchar2.h \
+ /usr/include/c++/4.6/bits/allocator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++allocator.h \
+ /usr/include/c++/4.6/ext/new_allocator.h \
+ /usr/include/c++/4.6/bits/localefwd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++locale.h \
+ /usr/include/c++/4.6/clocale /usr/include/locale.h \
+ /usr/include/bits/locale.h /usr/include/c++/4.6/iosfwd \
+ /usr/include/c++/4.6/bits/ostream_insert.h \
+ /usr/include/c++/4.6/bits/cxxabi_forced.h \
+ /usr/include/c++/4.6/bits/stl_function.h \
+ /usr/include/c++/4.6/backward/binders.h \
+ /usr/include/c++/4.6/bits/range_access.h \
+ /usr/include/c++/4.6/bits/basic_string.h \
+ /usr/include/c++/4.6/ext/atomicity.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr-default.h \
+ /usr/include/pthread.h /usr/include/sched.h /usr/include/bits/sched.h \
+ /usr/include/bits/timex.h /usr/include/bits/setjmp.h \
+ /usr/include/unistd.h /usr/include/bits/posix_opt.h \
+ /usr/include/bits/environments.h /usr/include/bits/confname.h \
+ /usr/include/getopt.h /usr/include/bits/unistd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/atomic_word.h \
+ /usr/include/c++/4.6/bits/basic_string.tcc /usr/include/c++/4.6/iostream \
+ /usr/include/c++/4.6/ostream /usr/include/c++/4.6/ios \
+ /usr/include/c++/4.6/bits/ios_base.h \
+ /usr/include/c++/4.6/bits/locale_classes.h \
+ /usr/include/c++/4.6/bits/locale_classes.tcc \
+ /usr/include/c++/4.6/streambuf /usr/include/c++/4.6/bits/streambuf.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.h \
+ /usr/include/c++/4.6/bits/locale_facets.h /usr/include/c++/4.6/cwctype \
+ /usr/include/wctype.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_base.h \
+ /usr/include/c++/4.6/bits/streambuf_iterator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_inline.h \
+ /usr/include/c++/4.6/bits/locale_facets.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.tcc \
+ /usr/include/c++/4.6/bits/ostream.tcc /usr/include/c++/4.6/istream \
+ /usr/include/c++/4.6/bits/istream.tcc ../KaldiLib/cblas.h \
+ ../KaldiLib/clapack.h ../KaldiLib/cblas.h ../KaldiLib/Common.h \
+ /usr/include/string.h /usr/include/bits/string3.h \
+ /usr/include/c++/4.6/sstream /usr/include/c++/4.6/bits/sstream.tcc \
+ ../KaldiLib/MathAux.h /usr/include/c++/4.6/cmath /usr/include/math.h \
+ /usr/include/bits/huge_val.h /usr/include/bits/huge_valf.h \
+ /usr/include/bits/huge_vall.h /usr/include/bits/inf.h \
+ /usr/include/bits/nan.h /usr/include/bits/mathdef.h \
+ /usr/include/bits/mathcalls.h /usr/include/bits/mathinline.h \
+ ../KaldiLib/Types.h ../KaldiLib/Error.h /usr/include/execinfo.h \
+ ../KaldiLib/Vector.tcc /usr/include/c++/4.6/cstring \
+ /usr/include/c++/4.6/fstream /usr/include/c++/4.6/bits/codecvt.h \
+ /usr/include/c++/4.6/cstdio /usr/include/libio.h \
+ /usr/include/_G_config.h /usr/include/bits/stdio_lim.h \
+ /usr/include/bits/sys_errlist.h /usr/include/bits/stdio.h \
+ /usr/include/bits/stdio2.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/basic_file.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++io.h \
+ /usr/include/c++/4.6/bits/fstream.tcc /usr/include/c++/4.6/iomanip \
+ ../KaldiLib/Matrix.h ../KaldiLib/Matrix.tcc /usr/include/c++/4.6/cfloat \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/float.h \
+ /usr/include/c++/4.6/typeinfo /usr/include/c++/4.6/limits \
+ /usr/include/c++/4.6/vector \
+ /usr/include/c++/4.6/bits/stl_uninitialized.h \
+ /usr/include/c++/4.6/bits/stl_vector.h \
+ /usr/include/c++/4.6/bits/stl_bvector.h \
+ /usr/include/c++/4.6/bits/vector.tcc ../KaldiLib/Vector.h \
+ ../KaldiLib/Matrix.h BiasedLinearity.h SharedLinearity.h Activation.h \
+ CRBEDctFeat.h BlockArray.h
+ObjFun.o: ObjFun.cc ObjFun.h /usr/include/c++/4.6/cassert \
+ /usr/include/assert.h /usr/include/features.h \
+ /usr/include/bits/predefs.h /usr/include/sys/cdefs.h \
+ /usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
+ /usr/include/gnu/stubs-32.h /usr/include/c++/4.6/limits \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++config.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/os_defines.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/cpu_defines.h \
+ /usr/include/c++/4.6/cmath /usr/include/c++/4.6/bits/cpp_type_traits.h \
+ /usr/include/c++/4.6/ext/type_traits.h /usr/include/math.h \
+ /usr/include/bits/huge_val.h /usr/include/bits/huge_valf.h \
+ /usr/include/bits/huge_vall.h /usr/include/bits/inf.h \
+ /usr/include/bits/nan.h /usr/include/bits/mathdef.h \
+ /usr/include/bits/mathcalls.h /usr/include/bits/mathinline.h \
+ ../KaldiLib/Matrix.h /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stddef.h \
+ /usr/include/stdlib.h /usr/include/bits/waitflags.h \
+ /usr/include/bits/waitstatus.h /usr/include/endian.h \
+ /usr/include/bits/endian.h /usr/include/bits/byteswap.h \
+ /usr/include/xlocale.h /usr/include/sys/types.h \
+ /usr/include/bits/types.h /usr/include/bits/typesizes.h \
+ /usr/include/time.h /usr/include/sys/select.h /usr/include/bits/select.h \
+ /usr/include/bits/sigset.h /usr/include/bits/time.h \
+ /usr/include/bits/select2.h /usr/include/sys/sysmacros.h \
+ /usr/include/bits/pthreadtypes.h /usr/include/alloca.h \
+ /usr/include/bits/stdlib.h /usr/include/c++/4.6/stdexcept \
+ /usr/include/c++/4.6/exception /usr/include/c++/4.6/string \
+ /usr/include/c++/4.6/bits/stringfwd.h \
+ /usr/include/c++/4.6/bits/char_traits.h \
+ /usr/include/c++/4.6/bits/stl_algobase.h \
+ /usr/include/c++/4.6/bits/functexcept.h \
+ /usr/include/c++/4.6/bits/exception_defines.h \
+ /usr/include/c++/4.6/ext/numeric_traits.h \
+ /usr/include/c++/4.6/bits/stl_pair.h /usr/include/c++/4.6/bits/move.h \
+ /usr/include/c++/4.6/bits/concept_check.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_types.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/4.6/bits/stl_iterator.h \
+ /usr/include/c++/4.6/debug/debug.h /usr/include/c++/4.6/bits/postypes.h \
+ /usr/include/c++/4.6/cwchar /usr/include/wchar.h /usr/include/stdio.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stdarg.h \
+ /usr/include/bits/wchar.h /usr/include/bits/wchar2.h \
+ /usr/include/c++/4.6/bits/allocator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++allocator.h \
+ /usr/include/c++/4.6/ext/new_allocator.h /usr/include/c++/4.6/new \
+ /usr/include/c++/4.6/bits/localefwd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++locale.h \
+ /usr/include/c++/4.6/clocale /usr/include/locale.h \
+ /usr/include/bits/locale.h /usr/include/c++/4.6/iosfwd \
+ /usr/include/c++/4.6/cctype /usr/include/ctype.h \
+ /usr/include/c++/4.6/bits/ostream_insert.h \
+ /usr/include/c++/4.6/bits/cxxabi_forced.h \
+ /usr/include/c++/4.6/bits/stl_function.h \
+ /usr/include/c++/4.6/backward/binders.h \
+ /usr/include/c++/4.6/bits/range_access.h \
+ /usr/include/c++/4.6/bits/basic_string.h \
+ /usr/include/c++/4.6/ext/atomicity.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr-default.h \
+ /usr/include/pthread.h /usr/include/sched.h /usr/include/bits/sched.h \
+ /usr/include/bits/timex.h /usr/include/bits/setjmp.h \
+ /usr/include/unistd.h /usr/include/bits/posix_opt.h \
+ /usr/include/bits/environments.h /usr/include/bits/confname.h \
+ /usr/include/getopt.h /usr/include/bits/unistd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/atomic_word.h \
+ /usr/include/c++/4.6/initializer_list \
+ /usr/include/c++/4.6/bits/basic_string.tcc /usr/include/c++/4.6/iostream \
+ /usr/include/c++/4.6/ostream /usr/include/c++/4.6/ios \
+ /usr/include/c++/4.6/bits/ios_base.h \
+ /usr/include/c++/4.6/bits/locale_classes.h \
+ /usr/include/c++/4.6/bits/locale_classes.tcc \
+ /usr/include/c++/4.6/streambuf /usr/include/c++/4.6/bits/streambuf.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.h \
+ /usr/include/c++/4.6/bits/locale_facets.h /usr/include/c++/4.6/cwctype \
+ /usr/include/wctype.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_base.h \
+ /usr/include/c++/4.6/bits/streambuf_iterator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_inline.h \
+ /usr/include/c++/4.6/bits/locale_facets.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.tcc \
+ /usr/include/c++/4.6/bits/ostream.tcc /usr/include/c++/4.6/istream \
+ /usr/include/c++/4.6/bits/istream.tcc ../KaldiLib/cblas.h \
+ ../KaldiLib/clapack.h ../KaldiLib/cblas.h ../KaldiLib/Common.h \
+ /usr/include/c++/4.6/cstdlib /usr/include/string.h \
+ /usr/include/bits/string3.h /usr/include/c++/4.6/sstream \
+ /usr/include/c++/4.6/bits/sstream.tcc ../KaldiLib/MathAux.h \
+ ../KaldiLib/Types.h ../KaldiLib/Error.h /usr/include/execinfo.h \
+ ../KaldiLib/Matrix.tcc /usr/include/c++/4.6/cfloat \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/float.h \
+ /usr/include/c++/4.6/fstream /usr/include/c++/4.6/bits/codecvt.h \
+ /usr/include/c++/4.6/cstdio /usr/include/libio.h \
+ /usr/include/_G_config.h /usr/include/bits/stdio_lim.h \
+ /usr/include/bits/sys_errlist.h /usr/include/bits/stdio.h \
+ /usr/include/bits/stdio2.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/basic_file.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++io.h \
+ /usr/include/c++/4.6/bits/fstream.tcc /usr/include/c++/4.6/iomanip \
+ /usr/include/c++/4.6/typeinfo /usr/include/c++/4.6/algorithm \
+ /usr/include/c++/4.6/utility /usr/include/c++/4.6/bits/stl_relops.h \
+ /usr/include/c++/4.6/bits/stl_algo.h \
+ /usr/include/c++/4.6/bits/algorithmfwd.h \
+ /usr/include/c++/4.6/bits/stl_heap.h \
+ /usr/include/c++/4.6/bits/stl_tempbuf.h \
+ /usr/include/c++/4.6/bits/stl_construct.h /usr/include/c++/4.6/vector \
+ /usr/include/c++/4.6/bits/stl_uninitialized.h \
+ /usr/include/c++/4.6/bits/stl_vector.h \
+ /usr/include/c++/4.6/bits/stl_bvector.h \
+ /usr/include/c++/4.6/bits/vector.tcc ../KaldiLib/Vector.h \
+ /usr/include/c++/4.6/cstddef ../KaldiLib/Vector.tcc \
+ /usr/include/c++/4.6/cstring ../KaldiLib/Matrix.h ../KaldiLib/Vector.h \
+ ../KaldiLib/Error.h
+Semaphore.o: Semaphore.cc Semaphore.h /usr/include/pthread.h \
+ /usr/include/features.h /usr/include/bits/predefs.h \
+ /usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \
+ /usr/include/gnu/stubs.h /usr/include/gnu/stubs-32.h \
+ /usr/include/endian.h /usr/include/bits/endian.h \
+ /usr/include/bits/byteswap.h /usr/include/sched.h \
+ /usr/include/bits/types.h /usr/include/bits/typesizes.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stddef.h /usr/include/time.h \
+ /usr/include/bits/sched.h /usr/include/bits/time.h \
+ /usr/include/bits/timex.h /usr/include/xlocale.h \
+ /usr/include/bits/pthreadtypes.h /usr/include/bits/setjmp.h
+SharedLinearity.o: SharedLinearity.cc SharedLinearity.h Component.h \
+ ../KaldiLib/Vector.h /usr/include/c++/4.6/cstddef \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++config.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/os_defines.h \
+ /usr/include/features.h /usr/include/bits/predefs.h \
+ /usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \
+ /usr/include/gnu/stubs.h /usr/include/gnu/stubs-32.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/cpu_defines.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stddef.h \
+ /usr/include/c++/4.6/cstdlib /usr/include/stdlib.h \
+ /usr/include/bits/waitflags.h /usr/include/bits/waitstatus.h \
+ /usr/include/endian.h /usr/include/bits/endian.h \
+ /usr/include/bits/byteswap.h /usr/include/xlocale.h \
+ /usr/include/sys/types.h /usr/include/bits/types.h \
+ /usr/include/bits/typesizes.h /usr/include/time.h \
+ /usr/include/sys/select.h /usr/include/bits/select.h \
+ /usr/include/bits/sigset.h /usr/include/bits/time.h \
+ /usr/include/bits/select2.h /usr/include/sys/sysmacros.h \
+ /usr/include/bits/pthreadtypes.h /usr/include/alloca.h \
+ /usr/include/bits/stdlib.h /usr/include/c++/4.6/stdexcept \
+ /usr/include/c++/4.6/exception /usr/include/c++/4.6/string \
+ /usr/include/c++/4.6/bits/stringfwd.h \
+ /usr/include/c++/4.6/bits/char_traits.h \
+ /usr/include/c++/4.6/bits/stl_algobase.h \
+ /usr/include/c++/4.6/bits/functexcept.h \
+ /usr/include/c++/4.6/bits/exception_defines.h \
+ /usr/include/c++/4.6/bits/cpp_type_traits.h \
+ /usr/include/c++/4.6/ext/type_traits.h \
+ /usr/include/c++/4.6/ext/numeric_traits.h \
+ /usr/include/c++/4.6/bits/stl_pair.h /usr/include/c++/4.6/bits/move.h \
+ /usr/include/c++/4.6/bits/concept_check.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_types.h \
+ /usr/include/c++/4.6/bits/stl_iterator_base_funcs.h \
+ /usr/include/c++/4.6/bits/stl_iterator.h \
+ /usr/include/c++/4.6/debug/debug.h /usr/include/c++/4.6/bits/postypes.h \
+ /usr/include/c++/4.6/cwchar /usr/include/wchar.h /usr/include/stdio.h \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/stdarg.h \
+ /usr/include/bits/wchar.h /usr/include/bits/wchar2.h \
+ /usr/include/c++/4.6/bits/allocator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++allocator.h \
+ /usr/include/c++/4.6/ext/new_allocator.h /usr/include/c++/4.6/new \
+ /usr/include/c++/4.6/bits/localefwd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++locale.h \
+ /usr/include/c++/4.6/clocale /usr/include/locale.h \
+ /usr/include/bits/locale.h /usr/include/c++/4.6/iosfwd \
+ /usr/include/c++/4.6/cctype /usr/include/ctype.h \
+ /usr/include/c++/4.6/bits/ostream_insert.h \
+ /usr/include/c++/4.6/bits/cxxabi_forced.h \
+ /usr/include/c++/4.6/bits/stl_function.h \
+ /usr/include/c++/4.6/backward/binders.h \
+ /usr/include/c++/4.6/bits/range_access.h \
+ /usr/include/c++/4.6/bits/basic_string.h \
+ /usr/include/c++/4.6/ext/atomicity.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/gthr-default.h \
+ /usr/include/pthread.h /usr/include/sched.h /usr/include/bits/sched.h \
+ /usr/include/bits/timex.h /usr/include/bits/setjmp.h \
+ /usr/include/unistd.h /usr/include/bits/posix_opt.h \
+ /usr/include/bits/environments.h /usr/include/bits/confname.h \
+ /usr/include/getopt.h /usr/include/bits/unistd.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/atomic_word.h \
+ /usr/include/c++/4.6/initializer_list \
+ /usr/include/c++/4.6/bits/basic_string.tcc /usr/include/c++/4.6/iostream \
+ /usr/include/c++/4.6/ostream /usr/include/c++/4.6/ios \
+ /usr/include/c++/4.6/bits/ios_base.h \
+ /usr/include/c++/4.6/bits/locale_classes.h \
+ /usr/include/c++/4.6/bits/locale_classes.tcc \
+ /usr/include/c++/4.6/streambuf /usr/include/c++/4.6/bits/streambuf.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.h \
+ /usr/include/c++/4.6/bits/locale_facets.h /usr/include/c++/4.6/cwctype \
+ /usr/include/wctype.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_base.h \
+ /usr/include/c++/4.6/bits/streambuf_iterator.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/ctype_inline.h \
+ /usr/include/c++/4.6/bits/locale_facets.tcc \
+ /usr/include/c++/4.6/bits/basic_ios.tcc \
+ /usr/include/c++/4.6/bits/ostream.tcc /usr/include/c++/4.6/istream \
+ /usr/include/c++/4.6/bits/istream.tcc ../KaldiLib/cblas.h \
+ ../KaldiLib/clapack.h ../KaldiLib/cblas.h ../KaldiLib/Common.h \
+ /usr/include/string.h /usr/include/bits/string3.h \
+ /usr/include/c++/4.6/sstream /usr/include/c++/4.6/bits/sstream.tcc \
+ ../KaldiLib/MathAux.h /usr/include/c++/4.6/cmath /usr/include/math.h \
+ /usr/include/bits/huge_val.h /usr/include/bits/huge_valf.h \
+ /usr/include/bits/huge_vall.h /usr/include/bits/inf.h \
+ /usr/include/bits/nan.h /usr/include/bits/mathdef.h \
+ /usr/include/bits/mathcalls.h /usr/include/bits/mathinline.h \
+ ../KaldiLib/Types.h ../KaldiLib/Error.h /usr/include/execinfo.h \
+ ../KaldiLib/Vector.tcc /usr/include/c++/4.6/cstring \
+ /usr/include/c++/4.6/fstream /usr/include/c++/4.6/bits/codecvt.h \
+ /usr/include/c++/4.6/cstdio /usr/include/libio.h \
+ /usr/include/_G_config.h /usr/include/bits/stdio_lim.h \
+ /usr/include/bits/sys_errlist.h /usr/include/bits/stdio.h \
+ /usr/include/bits/stdio2.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/basic_file.h \
+ /usr/include/c++/4.6/x86_64-linux-gnu/32/bits/c++io.h \
+ /usr/include/c++/4.6/bits/fstream.tcc /usr/include/c++/4.6/iomanip \
+ ../KaldiLib/Matrix.h ../KaldiLib/Matrix.tcc /usr/include/c++/4.6/cfloat \
+ /usr/lib/gcc/x86_64-linux-gnu/4.6/include/float.h \
+ /usr/include/c++/4.6/typeinfo /usr/include/c++/4.6/algorithm \
+ /usr/include/c++/4.6/utility /usr/include/c++/4.6/bits/stl_relops.h \
+ /usr/include/c++/4.6/bits/stl_algo.h \
+ /usr/include/c++/4.6/bits/algorithmfwd.h \
+ /usr/include/c++/4.6/bits/stl_heap.h \
+ /usr/include/c++/4.6/bits/stl_tempbuf.h \
+ /usr/include/c++/4.6/bits/stl_construct.h /usr/include/c++/4.6/limits \
+ /usr/include/c++/4.6/vector \
+ /usr/include/c++/4.6/bits/stl_uninitialized.h \
+ /usr/include/c++/4.6/bits/stl_vector.h \
+ /usr/include/c++/4.6/bits/stl_bvector.h \
+ /usr/include/c++/4.6/bits/vector.tcc ../KaldiLib/Vector.h \
+ ../KaldiLib/Matrix.h
diff --git a/src/TNetLib/.svn/entries b/src/TNetLib/.svn/entries
new file mode 100644
index 0000000..ed607ae
--- /dev/null
+++ b/src/TNetLib/.svn/entries
@@ -0,0 +1,878 @@
+10
+
+dir
+117
+svn+ssh://merlin.fit.vutbr.cz/svn/TNet/trunk/src/TNetLib
+svn+ssh://merlin.fit.vutbr.cz/svn/TNet
+
+
+
+2012-03-23T13:22:49.912359Z
+110
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+bda6da93-004a-4ae9-8e07-715c10848801
+
+BlockArray.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+ec62c49112c4f3bd3e61e13d742b1397
+2012-02-07T17:50:53.635354Z
+103
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+1521
+
+Activation.cc
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+8a83e5928a8ee9d2669aaab03140a700
+2012-03-23T13:22:49.912359Z
+110
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+3322
+
+Mutex.cc
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+1a348d0aabada64e969a5ea81762eb9f
+2011-09-26T13:54:40.854717Z
+71
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+801
+
+Cache.cc
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+d970ce69139e8ae23c4d9adcd863ea0c
+2011-12-22T15:49:51.623339Z
+96
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+6601
+
+Activation.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+12d3b3c4b4b0224cc73d0b8e623d91fc
+2012-03-23T13:22:49.912359Z
+110
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+2326
+
+Nnet.cc
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+4802785011f458bcec1f1a3470a07e42
+2012-03-23T13:22:49.912359Z
+110
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+9013
+
+Mutex.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+254e1c11aaf2a523b7e38cc45d96d6f7
+2011-09-26T13:54:40.854717Z
+71
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+564
+
+Component.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+616cb6be98fa3dcdd2d8183ba890b22c
+2012-03-23T13:22:49.912359Z
+110
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+8904
+
+Cache.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+266d0fff2fdd76091947532e70cc75a3
+2011-09-26T13:54:40.854717Z
+71
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+2012
+
+Nnet.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+9f5e67cda2fd05d98c21d015b4d57a1e
+2012-03-23T13:22:49.912359Z
+110
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+4671
+
+CRBEDctFeat.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+c2be5cbeeb5fff014e057de0f6a93369
+2011-09-26T13:54:40.854717Z
+71
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+10944
+
+BiasedLinearity.cc
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+8a2cf2fc9b1b9b27f5357782b69f04ee
+2012-03-23T13:22:49.912359Z
+110
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+3927
+
+Thread.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+8430b7172bec8551de3f94814050c23c
+2012-01-28T20:04:40.717883Z
+101
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+1089
+
+BiasedLinearity.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+5780ba3194b5d3dcc7cf7dc2c73d4d31
+2012-03-23T13:22:49.912359Z
+110
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+2345
+
+Semaphore.cc
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+a1d5c7d673835fd44c2fb268fce1526d
+2011-09-26T13:54:40.854717Z
+71
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+966
+
+ObjFun.cc
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+0f12777ac8f37a36cdeb296d0390c6c5
+2012-03-23T13:22:49.912359Z
+110
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+6387
+
+ObjFun.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+972e58ba9310592ad6b0e6da7925eebd
+2012-03-23T13:22:49.912359Z
+110
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+3680
+
+Semaphore.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+319edc999ff2279c8cc225f8e1d666cb
+2011-09-26T13:54:40.854717Z
+71
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+379
+
+Barrier.cc
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+83b98a4717f4ed3262c5a046892b78dd
+2011-10-03T11:00:38.812084Z
+78
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+3910
+
+Platform.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+d1f1b4fb0f8061ec4174b0fd51906f80
+2012-03-23T13:22:49.912359Z
+110
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+9235
+
+SharedLinearity.cc
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+60da58805959d192b64349ce192f3d2e
+2012-03-23T13:22:49.912359Z
+110
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+7833
+
+Barrier.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+49ffe12bf135c9391de4f76e11cf98b7
+2011-10-03T11:00:38.812084Z
+78
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+1065
+
+Makefile
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+ef89868dc4e4a9f796f091cbc15057b2
+2011-09-26T13:54:40.854717Z
+71
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+384
+
+BlockArray.cc
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+9d9d3d057f5915898d87ad57e448e00f
+2012-02-07T17:50:53.635354Z
+103
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+3199
+
+SharedLinearity.h
+file
+
+
+
+
+2012-04-02T13:49:14.000000Z
+38bf25a1a2e051d9ac2445e25a39152f
+2011-10-11T11:00:50.704096Z
+81
+iveselyk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+2287
+
diff --git a/src/TNetLib/.svn/text-base/Activation.cc.svn-base b/src/TNetLib/.svn/text-base/Activation.cc.svn-base
new file mode 100644
index 0000000..8e84190
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Activation.cc.svn-base
@@ -0,0 +1,138 @@
+
+#include "Activation.h"
+
+
+namespace TNet {
+
+void Sigmoid::PropagateFnc(const BfMatrix& X, BfMatrix& Y) {
+  //Y = 1/(1+e^{-X})
+  for(size_t r=0; r<X.Rows(); r++) {
+    for(size_t c=0; c<X.Cols(); c++) {
+      Y(r,c) = 1.0f/(1.0f+exp(-X(r,c)));
+    }
+  }
+}
+
+
+void Sigmoid::BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y) {
+  const Matrix<BaseFloat>& out = GetOutput();
+  //Y = OUT*(1-OUT)*X //ODVOZENO
+  for(size_t r=0; r<X.Rows(); r++) {
+    for(size_t c=0; c<X.Cols(); c++) {
+      Y(r,c) = X(r,c)*out(r,c)*(1.0f-out(r,c));
+    }
+  }
+}
+
+
+
+void Softmax::PropagateFnc(const BfMatrix& X, BfMatrix& Y) {
+  //Y_j = e^X_j / sum_i(e^X_i)
+  //
+  //    e^(X_j+c) / sum_i(e^X_i+c)
+  //    = e^c.e^X_h / e^c.sum_i(e^X_i)
+  //    = e^X_j / sum_i(e^X_i)
+  //
+  size_t rows = X.Rows();
+  for(size_t i=0; i<rows; i++) {
+    BfSubVector y_i(Y[i]); //<< y_i gets pointer to i'th row of matrix Y
+    y_i.Copy(X[i]);
+    BaseFloat max = y_i.Max();
+    y_i.Subtract(max);
+    y_i.ApplyExp();
+    BaseFloat sum = y_i.Sum();
+    y_i.Scale(1.0f/sum);
+  }
+}
+
+
+void Softmax::BackpropagateFnc(const BfMatrix& X, BfMatrix& Y) {
+  //simply copy the error...,
+  Y.Copy(X);
+}
+
+
+void BlockSoftmax::ReadFromStream(std::istream& rIn) {
+  rIn >> mDim; 
+  mDimOffset.Init(mDim.Dim()+1);
+  
+  int off=0; 
+  for(int i=0; i<mDim.Dim(); i++) { 
+    mDimOffset[i]=off;
+    off += mDim[i];
+  }
+  mDimOffset[mDim.Dim()]=off;
+
+  if(off!=GetNOutputs()) {
+    KALDI_ERR << "Non-matching dimension of sum of softmaxes,"
+      << " the sum:" << off 
+      << " GetNOutputs:" << GetNOutputs();
+  }
+}
+
+void BlockSoftmax::WriteToStream(std::ostream& rOut) {
+  rOut << mDim;
+}
+
+
+
+
+void BlockSoftmax::PropagateFnc(const BfMatrix& X, BfMatrix& Y) {
+  //Y_j = e^X_j / sum_i(e^X_i)
+  //
+  //    e^(X_j+c) / sum_i(e^X_i+c)
+  //    = e^c.e^X_h / e^c.sum_i(e^X_i)
+  //    = e^X_j / sum_i(e^X_i)
+  //
+  size_t rows = X.Rows();
+  for(size_t i=0; i<rows; i++) {
+    BfSubVector y_i(Y[i]); //<< y_i gets pointer to i'th row of matrix Y
+    y_i.Copy(X[i]);
+    //BaseFloat max = y_i.Max();
+    //y_i.Subtract(max);
+    //y_i.ApplyExp();
+    //normalize separately on each softmax interval...
+    for(int j=0; j<mDim.Dim(); j++) {
+      BfSubVector y_i_smx_j(y_i.Range(mDimOffset[j],mDim[j]));
+      BaseFloat max = y_i_smx_j.Max();
+      y_i_smx_j.Subtract(max);
+      y_i_smx_j.ApplyExp();
+      BaseFloat sum = y_i_smx_j.Sum();
+      y_i_smx_j.Scale(1.0f/sum);
+    }
+  }
+
+//  X.CheckData("BlockSoftmax PropagateFnc X");
+//  Y.CheckData("BlockSoftmax PropagateFnc Y");
+}
+
+
+void BlockSoftmax::BackpropagateFnc(const BfMatrix& X, BfMatrix& Y) {
+  //set the output to zero
+  Y.Zero();
+  //copy only parts of the error
+  //from softmax intervals which sum up to 0.0, not 1.0
+  for(int i=0; i<X.Rows(); i++) {
+    for(int j=0; j<mDim.Dim(); j++) {
+      const BfSubVector x_i_smx_j(X[i].Range(mDimOffset[j],mDim[j]));
+      BaseFloat sum = x_i_smx_j.Sum();
+      if(sum > -0.1 && sum < 0.1) {
+        BfSubVector y_i_smx_j(Y[i].Range(mDimOffset[j],mDim[j]));
+        y_i_smx_j.Copy(x_i_smx_j);
+      } else if (sum > 0.9 && sum < 1.1) {
+        ; //do nothing
+      } else {
+        KALDI_ERR << "Invalid sum: " << sum;
+      }
+    }
+  }
+
+//  X.CheckData("BlockSoftmax BackpropagateFnc X");
+//  Y.CheckData("BlockSoftmax BackpropagateFnc Y");
+
+}
+
+
+
+} //namespace TNet
+
diff --git a/src/TNetLib/.svn/text-base/Activation.h.svn-base b/src/TNetLib/.svn/text-base/Activation.h.svn-base
new file mode 100644
index 0000000..90263d0
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Activation.h.svn-base
@@ -0,0 +1,104 @@
+
+#ifndef _ACT_FUN_I_
+#define _ACT_FUN_I_
+
+
+#include "Component.h"
+
+
+namespace TNet
+{
+
+  /**
+   * Sigmoid activation function
+   */
+  class Sigmoid : public Component
+  {
+    public:
+      Sigmoid(size_t nInputs, size_t nOutputs, Component *pPred)
+       : Component(nInputs,nOutputs,pPred)
+      { }
+
+      ComponentType GetType() const
+      { return SIGMOID; }
+
+      const char* GetName() const
+      { return "<sigmoid>"; }
+
+      Component* Clone() const
+      { return new Sigmoid(GetNInputs(),GetNOutputs(),NULL); }
+
+    protected:
+      void PropagateFnc(const BfMatrix& X, BfMatrix& Y);
+      void BackpropagateFnc(const BfMatrix& X, BfMatrix& Y);
+  };
+    
+
+  /**
+   * Softmax activation function
+   */
+  class Softmax : public Component
+  {
+    public:
+      Softmax(size_t nInputs, size_t nOutputs, Component *pPred)
+       : Component(nInputs,nOutputs,pPred)
+      { }
+
+      ComponentType GetType() const
+      { return SOFTMAX; }
+
+      const char* GetName() const
+      { return "<softmax>"; }
+
+      Component* Clone() const
+      { return new Softmax(GetNInputs(),GetNOutputs(),NULL); }
+
+    protected:
+      void PropagateFnc(const BfMatrix& X, BfMatrix& Y);
+      void BackpropagateFnc(const BfMatrix& X, BfMatrix& Y);
+  };
+
+
+  /**
+   * BlockSoftmax activation function.
+   * It is several softmaxes in one.
+   * The dimensions of softmaxes are given by integer vector.
+   * During backpropagation: 
+   *  If the derivatives sum up to 0, they are backpropagated. 
+   *  If the derivatives sup up to 1, they are discarded
+   *  (like this we know that the softmax was 'inactive').
+   */
+  class BlockSoftmax : public Component
+  {
+    public:
+      BlockSoftmax(size_t nInputs, size_t nOutputs, Component *pPred)
+       : Component(nInputs,nOutputs,pPred)
+      { }
+
+      ComponentType GetType() const
+      { return BLOCK_SOFTMAX; }
+
+      const char* GetName() const
+      { return "<blocksoftmax>"; }
+
+      Component* Clone() const
+      { return new BlockSoftmax(*this); }
+
+      void ReadFromStream(std::istream& rIn);
+      void WriteToStream(std::ostream& rOut);
+
+    protected:
+      void PropagateFnc(const BfMatrix& X, BfMatrix& Y);
+      void BackpropagateFnc(const BfMatrix& X, BfMatrix& Y);
+
+    private:
+      Vector<int> mDim;
+      Vector<int> mDimOffset;
+  };
+
+
+  
+} //namespace
+
+
+#endif
diff --git a/src/TNetLib/.svn/text-base/Barrier.cc.svn-base b/src/TNetLib/.svn/text-base/Barrier.cc.svn-base
new file mode 100644
index 0000000..0170e04
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Barrier.cc.svn-base
@@ -0,0 +1,143 @@
+/*
+ * barrier.c
+ *
+ * This file implements the "barrier" synchronization construct.
+ *
+ * A barrier causes threads to wait until a set of threads has
+ * all "reached" the barrier. The number of threads required is
+ * set when the barrier is initialized, and cannot be changed
+ * except by reinitializing.
+ *
+ * The barrier_init() and barrier_destroy() functions,
+ * respectively, allow you to initialize and destroy the
+ * barrier.
+ *
+ * The barrier_wait() function allows a thread to wait for a
+ * barrier to be completed. One thread (the one that happens to
+ * arrive last) will return from barrier_wait() with the status
+ * -1 on success -- others will return with 0. The special
+ * status makes it easy for the calling code to cause one thread
+ * to do something in a serial region before entering another
+ * parallel section of code.
+ */
+#include <pthread.h>
+#include "Error.h"
+#include "Barrier.h"
+
+namespace TNet {
+
+/*
+ * Initialize a barrier for use.
+ */
+Barrier::Barrier(int count)
+ : threshold_(count), counter_(count), cycle_(0) {
+
+  if(0 != pthread_mutex_init(&mutex_, NULL))
+    KALDI_ERR << "Cannot initialize mutex";
+  
+  if(0 != pthread_cond_init(&cv_, NULL)) {
+    pthread_mutex_destroy(&mutex_);
+    KALDI_ERR << "Cannot initilize condv";
+  }
+}
+
+/*
+ * Destroy a barrier when done using it.
+ */
+Barrier::~Barrier() {
+
+  if(0 != pthread_mutex_lock(&mutex_))
+    KALDI_ERR << "Cannot lock mutex";
+
+  /*
+   * Check whether any threads are known to be waiting; report
+   * "BUSY" if so.
+   */
+  if(counter_ != threshold_) {
+    pthread_mutex_unlock (&mutex_);
+    KALDI_ERR << "Cannot destroy barrier with waiting thread";
+  }
+
+  if(0 != pthread_mutex_unlock(&mutex_))
+    KALDI_ERR << "Cannot unlock barrier";
+
+  /*
+   * If unable to destroy either 1003.1c synchronization
+   * object, halt
+   */
+  if(0 != pthread_mutex_destroy(&mutex_))
+    KALDI_ERR << "Cannot destroy mutex";
+
+  if(0 != pthread_cond_destroy(&cv_)) 
+    KALDI_ERR << "Cannot destroy condv";
+}
+
+
+void Barrier::SetThreshold(int thr) {
+  if(counter_ != threshold_) 
+    KALDI_ERR << "Cannot set threshold, while a thread is waiting";
+
+  threshold_ = thr; counter_ = thr;
+}
+
+
+
+/*
+ * Wait for all members of a barrier to reach the barrier. When
+ * the count (of remaining members) reaches 0, broadcast to wake
+ * all threads waiting.
+ */
+int Barrier::Wait() {
+  int status, cancel, tmp, cycle;
+
+  if(threshold_ == 0)
+    KALDI_ERR << "Cannot wait when Threshold value was not set";
+
+  if(0 != pthread_mutex_lock(&mutex_)) 
+    KALDI_ERR << "Cannot lock mutex";
+
+  cycle = cycle_;   /* Remember which cycle we're on */
+
+  if(--counter_ == 0) {
+    cycle_ = !cycle_;
+    counter_ = threshold_;
+    status = pthread_cond_broadcast(&cv_);
+    /*
+     * The last thread into the barrier will return status
+     * -1 rather than 0, so that it can be used to perform
+     * some special serial code following the barrier.
+     */
+    if(status == 0) status = -1;
+  } else {
+    /*
+     * Wait with cancellation disabled, because barrier_wait
+     * should not be a cancellation point.
+     */
+    pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel);
+
+    /*
+     * Wait until the barrier's cycle changes, which means
+     * that it has been broadcast, and we don't want to wait
+     * anymore.
+     */
+    while (cycle == cycle_) {
+      status = pthread_cond_wait(&cv_, &mutex_);
+      if (status != 0) break;
+    }
+
+    pthread_setcancelstate(cancel, &tmp);
+  }
+  /*
+   * Ignore an error in unlocking. It shouldn't happen, and
+   * reporting it here would be misleading -- the barrier wait
+   * completed, after all, whereas returning, for example,
+   * EINVAL would imply the wait had failed. The next attempt
+   * to use the barrier *will* return an error, or hang, due
+   * to whatever happened to the mutex.
+   */
+  pthread_mutex_unlock (&mutex_);
+  return status;          /* error, -1 for waker, or 0 */
+}
+
+
+}//namespace TNet
diff --git a/src/TNetLib/.svn/text-base/Barrier.h.svn-base b/src/TNetLib/.svn/text-base/Barrier.h.svn-base
new file mode 100644
index 0000000..a5849d2
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Barrier.h.svn-base
@@ -0,0 +1,41 @@
+/*
+ * barrier.h
+ *
+ * This header file describes the "barrier" synchronization
+ * construct. The type barrier_t describes the full state of the
+ * barrier including the POSIX 1003.1c synchronization objects
+ * necessary.
+ *
+ * A barrier causes threads to wait until a set of threads has
+ * all "reached" the barrier. The number of threads required is
+ * set when the barrier is initialized, and cannot be changed
+ * except by reinitializing.
+ */
+#include <pthread.h>
+
+#ifndef barrier_h
+#define barrier_h
+
+namespace TNet {
+
+/*
+ * Structure describing a barrier.
+ */
+class Barrier {
+ public:
+  Barrier(int count=0);
+  ~Barrier();
+  void SetThreshold(int thr);
+  int Wait();
+ private:
+  pthread_mutex_t     mutex_;          /* Control access to barrier */
+  pthread_cond_t      cv_;             /* wait for barrier */
+  int                 threshold_;      /* number of threads required */
+  int                 counter_;        /* current number of threads */
+  int                 cycle_;          /* alternate wait cycles (0 or 1) */
+};
+
+}//namespace TNet
+
+#endif
+
diff --git a/src/TNetLib/.svn/text-base/BiasedLinearity.cc.svn-base b/src/TNetLib/.svn/text-base/BiasedLinearity.cc.svn-base
new file mode 100644
index 0000000..b52aeb0
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/BiasedLinearity.cc.svn-base
@@ -0,0 +1,180 @@
+
+
+#include "BiasedLinearity.h"
+
+
+namespace TNet {
+
+
+void
+BiasedLinearity::
+PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+{
+  //y = b + x.A
+
+  //precopy bias
+  size_t rows = X.Rows();
+  for(size_t i=0; i<rows; i++) {
+    Y[i].Copy(*mpBias);
+  }
+
+  //multiply matrix by matrix with mLinearity
+  Y.BlasGemm(1.0f, X, NO_TRANS, *mpLinearity, NO_TRANS, 1.0f);
+}
+
+
+void
+BiasedLinearity::
+BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+{
+  // e' = e.A^T
+  Y.Zero();
+  Y.BlasGemm(1.0f, X, NO_TRANS, *mpLinearity, TRANS, 0.0f);
+}
+
+
+
+void
+BiasedLinearity::
+ReadFromStream(std::istream& rIn)
+{
+  //matrix is stored transposed as SNet does
+  Matrix<BaseFloat> transpose;
+  rIn >> transpose;
+  mLinearity = Matrix<BaseFloat>(transpose, TRANS);
+  //biases stored normally
+  rIn >> mBias;
+}
+
+ 
+void
+BiasedLinearity::
+WriteToStream(std::ostream& rOut)
+{
+  //matrix is stored transposed as SNet does
+  Matrix<BaseFloat> transpose(mLinearity, TRANS);
+  rOut << transpose;
+  //biases stored normally
+  rOut << mBias;
+  rOut << std::endl;
+}
+
+
+void
+BiasedLinearity::
+Gradient()
+{
+  //calculate gradient of weight matrix
+  mLinearityCorrection.Zero();
+  mLinearityCorrection.BlasGemm(1.0f, GetInput(), TRANS, 
+                                GetErrorInput(), NO_TRANS, 
+                                0.0f);
+
+  //calculate gradient of bias
+  mBiasCorrection.Set(0.0f);
+  size_t rows = GetInput().Rows();
+  for(size_t i=0; i<rows; i++) {
+    mBiasCorrection.Add(GetErrorInput()[i]);
+  }
+
+  /* 
+  //perform update
+  mLinearity.AddScaled(-mLearningRate, mLinearityCorrection);
+  mBias.AddScaled(-mLearningRate, mBiasCorrection);
+  */
+}
+
+
+void 
+BiasedLinearity::
+AccuGradient(const UpdatableComponent& src, int thr, int thrN) {
+  //cast the argument
+  const BiasedLinearity& src_comp = dynamic_cast<const BiasedLinearity&>(src);
+
+  //allocate accumulators when needed
+  if(mLinearityCorrectionAccu.MSize() == 0) {
+    mLinearityCorrectionAccu.Init(mLinearity.Rows(),mLinearity.Cols());
+  }
+  if(mBiasCorrectionAccu.MSize() == 0) {
+    mBiasCorrectionAccu.Init(mBias.Dim());
+  }
+
+  //need to find out which rows to sum...
+  int div = mLinearityCorrection.Rows() / thrN;
+  int mod = mLinearityCorrection.Rows() % thrN;
+
+  int origin = thr * div + ((mod > thr)? thr : mod);
+  int rows = div + ((mod > thr)? 1 : 0);
+
+  //create the matrix windows
+  const SubMatrix<BaseFloat> src_mat (
+    src_comp.mLinearityCorrection, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+  SubMatrix<double> tgt_mat (
+    mLinearityCorrectionAccu, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+  //sum the rows
+  Add(tgt_mat,src_mat);
+
+  //first thread will always sum the bias correction
+  if(thr == 0) {
+    Add(mBiasCorrectionAccu,src_comp.mBiasCorrection);
+  }
+
+}
+
+
+void
+BiasedLinearity::
+Update(int thr, int thrN)
+{
+  //need to find out which rows to sum...
+  int div = mLinearity.Rows() / thrN;
+  int mod = mLinearity.Rows() % thrN;
+
+  int origin = thr * div + ((mod > thr)? thr : mod);
+  int rows = div + ((mod > thr)? 1 : 0);
+
+  //std::cout << "[P" << thr << "," << origin << "," << rows << "]" << std::flush;
+
+  //get the matrix windows
+  SubMatrix<double> src_mat (
+    mLinearityCorrectionAccu, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+  SubMatrix<BaseFloat> tgt_mat (
+    mLinearity, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+
+
+  //update weights
+  AddScaled(tgt_mat, src_mat, -mLearningRate);
+
+  //perform L2 regularization (weight decay)
+  BaseFloat L2_decay = -mLearningRate * mWeightcost * mBunchsize;
+  if(L2_decay != 0.0) {
+    tgt_mat.AddScaled(L2_decay, tgt_mat);
+  }
+
+  //first thread always update bias
+  if(thr == 0) {
+    //std::cout << "[" << thr << "BP]" << std::flush;
+    AddScaled(mBias, mBiasCorrectionAccu, -mLearningRate);
+  }
+
+  //reset the accumulators
+  src_mat.Zero();
+  if(thr == 0) {
+    mBiasCorrectionAccu.Zero();
+  }
+
+}
+
+} //namespace
diff --git a/src/TNetLib/.svn/text-base/BiasedLinearity.h.svn-base b/src/TNetLib/.svn/text-base/BiasedLinearity.h.svn-base
new file mode 100644
index 0000000..5018637
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/BiasedLinearity.h.svn-base
@@ -0,0 +1,92 @@
+#ifndef _BIASED_LINEARITY_H_
+#define _BIASED_LINEARITY_H_
+
+
+#include "Component.h"
+
+#include "Matrix.h"
+#include "Vector.h"
+
+
+namespace TNet {
+
+class BiasedLinearity : public UpdatableComponent
+{
+ public:
+
+  BiasedLinearity(size_t nInputs, size_t nOutputs, Component *pPred);
+  ~BiasedLinearity() { } 
+  
+  ComponentType GetType() const
+  { return BIASED_LINEARITY; }
+
+  const char* GetName() const
+  { return "<BiasedLinearity>"; }
+
+  Component* Clone() const;
+
+  void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y);
+  void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y);
+
+  void ReadFromStream(std::istream& rIn);
+  void WriteToStream(std::ostream& rOut);
+
+  /// calculate gradient
+  void Gradient();
+  /// accumulate gradient from other components
+  void AccuGradient(const UpdatableComponent& src, int thr, int thrN);  
+  /// update weights, reset the accumulator
+  void Update(int thr, int thrN);
+
+ protected:
+  Matrix<BaseFloat> mLinearity;  ///< Matrix with neuron weights
+  Vector<BaseFloat> mBias;       ///< Vector with biases
+
+  const Matrix<BaseFloat>* mpLinearity;
+  const Vector<BaseFloat>* mpBias;
+
+  Matrix<BaseFloat> mLinearityCorrection; ///< Matrix for linearity updates
+  Vector<BaseFloat> mBiasCorrection;      ///< Vector for bias updates
+
+  Matrix<double> mLinearityCorrectionAccu; ///< Matrix for summing linearity updates
+  Vector<double> mBiasCorrectionAccu;      ///< Vector for summing bias updates
+
+};
+
+
+
+
+////////////////////////////////////////////////////////////////////////////
+// INLINE FUNCTIONS 
+// BiasedLinearity::
+inline 
+BiasedLinearity::
+BiasedLinearity(size_t nInputs, size_t nOutputs, Component *pPred)
+  : UpdatableComponent(nInputs, nOutputs, pPred), 
+    mLinearity(), mBias(), //cloned instaces don't need this
+    mpLinearity(&mLinearity), mpBias(&mBias), 
+    mLinearityCorrection(nInputs,nOutputs), mBiasCorrection(nOutputs),
+    mLinearityCorrectionAccu(), mBiasCorrectionAccu() //cloned instances don't need this
+{ }
+
+inline
+Component* 
+BiasedLinearity::
+Clone() const
+{
+  BiasedLinearity* ptr = new BiasedLinearity(GetNInputs(), GetNOutputs(), NULL);
+  ptr->mpLinearity = mpLinearity; //copy pointer from currently active weights
+  ptr->mpBias = mpBias;           //...
+
+  ptr->mLearningRate = mLearningRate;
+  
+  return ptr;
+}
+
+
+
+} //namespace
+
+
+
+#endif
diff --git a/src/TNetLib/.svn/text-base/BlockArray.cc.svn-base b/src/TNetLib/.svn/text-base/BlockArray.cc.svn-base
new file mode 100644
index 0000000..18a41d2
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/BlockArray.cc.svn-base
@@ -0,0 +1,136 @@
+
+
+#include "BlockArray.h"
+#include "Nnet.h"
+
+
+namespace TNet
+{
+
+  void 
+  BlockArray::
+  PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+  {
+    SubMatrix<BaseFloat> colsX(X,0,1,0,1); //dummy dimensions
+    SubMatrix<BaseFloat> colsY(Y,0,1,0,1); //dummy dimensions
+    
+    int X_src_ori=0, Y_tgt_ori=0;
+    for(int i=0; i<mNBlocks; i++) {
+      //get the correct submatrices
+      int colsX_cnt=mBlocks[i]->GetNInputs();
+      int colsY_cnt=mBlocks[i]->GetNOutputs();
+      colsX = X.Range(0,X.Rows(),X_src_ori,colsX_cnt);
+      colsY = Y.Range(0,Y.Rows(),Y_tgt_ori,colsY_cnt);
+
+      //propagate through the block(network)
+      mBlocks[i]->Propagate(colsX,colsY);
+
+      //shift the origin coordinates
+      X_src_ori += colsX_cnt;
+      Y_tgt_ori += colsY_cnt;
+    }
+
+    assert(X_src_ori == X.Cols());
+    assert(Y_tgt_ori == Y.Cols());
+  }
+
+
+  void 
+  BlockArray::
+  BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+  {
+    KALDI_ERR << "Unimplemented";
+  }
+
+  
+  void 
+  BlockArray::
+  Update() 
+  {
+    KALDI_ERR << "Unimplemented";
+  }
+
+
+  void
+  BlockArray::
+  ReadFromStream(std::istream& rIn)
+  {
+    if(mBlocks.size() > 0) {
+      KALDI_ERR << "Cannot read block vector, "
+                << "aleady filled bt "
+                << mBlocks.size()
+                << "elements";
+    }
+
+    rIn >> std::ws >> mNBlocks;
+    if(mNBlocks < 1) {
+      KALDI_ERR << "Bad number of blocks:" << mNBlocks;
+    }
+
+    //read all the blocks
+    std::string tag;
+    int block_id;
+    for(int i=0; i<mNBlocks; i++) {
+      //read tag <block>
+      rIn >> std::ws >> tag;
+      //make it lowercase
+      std::transform(tag.begin(), tag.end(), tag.begin(), tolower);
+      //check
+      if(tag!="<block>") {
+        KALDI_ERR << "<block> keywotd expected";
+      }
+    
+      //read block number
+      rIn >> std::ws >> block_id;
+      if(block_id != i+1) {
+        KALDI_ERR << "Expected block number:" << i+1
+                  << " read block number: " << block_id;
+      }
+
+      //read the nnet
+      Network* p_nnet = new Network;
+      p_nnet->ReadNetwork(rIn);
+      if(p_nnet->Layers() == 0) {
+        KALDI_ERR << "Cannot read empty network to a block";
+      }
+
+      //add it to the vector
+      mBlocks.push_back(p_nnet);
+    }
+
+    //check the declared dimensionality
+    int sum_inputs=0, sum_outputs=0;
+    for(int i=0; i<mNBlocks; i++) {
+      sum_inputs += mBlocks[i]->GetNInputs();
+      sum_outputs += mBlocks[i]->GetNOutputs();
+    }
+    if(sum_inputs != GetNInputs()) {
+      KALDI_ERR << "Non-matching number of INPUTS! Declared:"
+                << GetNInputs()
+                << " summed from blocks"
+                << sum_inputs;
+    }
+    if(sum_outputs != GetNOutputs()) {
+      KALDI_ERR << "Non-matching number of OUTPUTS! Declared:"
+                << GetNOutputs()
+                << " summed from blocks"
+                << sum_outputs;
+    }
+  }
+
+   
+  void
+  BlockArray::
+  WriteToStream(std::ostream& rOut)
+  {
+    rOut << " " << mBlocks.size() << " ";
+    for(int i=0; i<mBlocks.size(); i++) {
+      rOut << "<block> " << i+1 << "\n";
+      mBlocks[i]->WriteNetwork(rOut);
+      rOut << "<endblock>\n";
+    }
+  }
+
+ 
+} //namespace
+
diff --git a/src/TNetLib/.svn/text-base/BlockArray.h.svn-base b/src/TNetLib/.svn/text-base/BlockArray.h.svn-base
new file mode 100644
index 0000000..e6a8657
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/BlockArray.h.svn-base
@@ -0,0 +1,85 @@
+#ifndef _BLOCK_ARRAY_H_
+#define _BLOCK_ARRAY_H_
+
+
+#include "Component.h"
+
+#include "Matrix.h"
+#include "Vector.h"
+
+
+namespace TNet {
+
+  class Network;
+
+  class BlockArray : public Component
+  {
+    public:
+
+      BlockArray(size_t nInputs, size_t nOutputs, Component *pPred); 
+      ~BlockArray();  
+      
+      ComponentType GetType() const;
+      const char* GetName() const;
+
+      void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y);
+      void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y);
+
+      void Update();
+
+      void ReadFromStream(std::istream& rIn);
+      void WriteToStream(std::ostream& rOut);
+ 
+      //:TODO:
+      Component* Clone() const { KALDI_ERR << "Unimplemented"; }
+
+    protected:
+      std::vector<Network*> mBlocks; ///< vector with networks, one network is one block
+      size_t mNBlocks;  
+  };
+
+
+
+
+  ////////////////////////////////////////////////////////////////////////////
+  // INLINE FUNCTIONS 
+  // BlockArray::
+  inline 
+  BlockArray::
+  BlockArray(size_t nInputs, size_t nOutputs, Component *pPred)
+    : Component(nInputs, nOutputs, pPred), 
+      mNBlocks(0) 
+  { }
+
+
+  inline
+  BlockArray::
+  ~BlockArray()
+  { 
+    for(int i=0; i<mBlocks.size(); i++) {
+      delete mBlocks[i];
+    }
+    mBlocks.clear();
+  }
+
+  inline Component::ComponentType
+  BlockArray::
+  GetType() const
+  {
+    return Component::BLOCK_ARRAY;
+  }
+
+  inline const char*
+  BlockArray::
+  GetName() const
+  {
+    return "<blockarray>";
+  }
+
+
+
+} //namespace
+
+
+
+#endif
diff --git a/src/TNetLib/.svn/text-base/CRBEDctFeat.h.svn-base b/src/TNetLib/.svn/text-base/CRBEDctFeat.h.svn-base
new file mode 100644
index 0000000..0984c36
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/CRBEDctFeat.h.svn-base
@@ -0,0 +1,432 @@
+#ifndef _CUCRBEDCTFEATURES_H_
+#define _CUCRBEDCTFEATURES_H_
+
+
+#include "Component.h"
+#include "Matrix.h"
+#include "Vector.h"
+#include "cblas.h"
+
+
+namespace TNet {
+
+  /**
+   * Expands the time context of the input features
+   * in N, out k*N, FrameOffset o_1,o_2,...,o_k
+   * FrameOffset example 11frames: -5 -4 -3 -2 -1 0 1 2 3 4 5
+   */
+  class Expand : public Component
+  {
+   public:
+    Expand(size_t nInputs, size_t nOutputs, Component* pPred)
+      : Component(nInputs,nOutputs,pPred)
+    { }
+
+    ~Expand()
+    { }
+
+    ComponentType GetType() const
+    { return EXPAND; }
+
+    const char* GetName() const
+    { return "<expand>"; }
+   
+    Component* Clone() const 
+    { 
+      Expand* p = new Expand(GetNInputs(),GetNOutputs(),NULL);
+      p->mFrameOffset.Init(mFrameOffset.Dim()); 
+      p->mFrameOffset.Copy(mFrameOffset); 
+      return p; 
+    }
+
+    void ReadFromStream(std::istream& rIn)
+    { rIn >> mFrameOffset; }
+
+    void WriteToStream(std::ostream& rOut)  
+    { rOut << mFrameOffset; }
+     
+   protected:
+    void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+    {
+      assert(X.Cols()*mFrameOffset.Dim() == Y.Cols());
+      assert(X.Rows() == Y.Rows());
+
+      for(size_t r=0;r<X.Rows();r++) {
+        for(size_t off=0;off<mFrameOffset.Dim();off++) {
+          int r_off = r + mFrameOffset[off];
+          if(r_off < 0) r_off = 0;
+          if(r_off >= X.Rows()) r_off = X.Rows()-1;
+          memcpy(Y.pRowData(r)+off*X.Cols(),X.pRowData(r_off),sizeof(BaseFloat)*X.Cols());
+        }
+      }
+    }
+
+    void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+    { Error("__func__ Nonsense"); }
+
+   protected:
+    Vector<int> mFrameOffset;
+  };
+
+
+
+  /**
+   * Rearrange the matrix columns according to the indices in mCopyFromIndices
+   */
+  class Copy : public Component
+  {
+   public:
+    Copy(size_t nInputs, size_t nOutputs, Component* pPred)
+      : Component(nInputs,nOutputs,pPred)
+    { }
+
+    ~Copy()
+    { }
+
+    ComponentType GetType() const
+    { return COPY; }
+
+    const char* GetName() const
+    { return "<copy>"; }
+    
+    Component* Clone() const 
+    { 
+      Copy* p = new Copy(GetNInputs(),GetNOutputs(),NULL);
+      p->mCopyFromIndices.Init(mCopyFromIndices.Dim()); 
+      p->mCopyFromIndices.Copy(mCopyFromIndices); 
+      return p; 
+    }
+
+    void ReadFromStream(std::istream& rIn)
+    { 
+      Vector<int> vec; rIn >> vec; vec.Add(-1); 
+      mCopyFromIndices.Init(vec.Dim()).Copy(vec);
+    }
+
+    void WriteToStream(std::ostream& rOut)  
+    { 
+      Vector<int> vec(mCopyFromIndices); 
+      vec.Add(1); rOut << vec; 
+    }
+     
+   protected:
+    void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+    {
+      assert(mCopyFromIndices.Dim() == Y.Cols());
+      for(int i=0; i<mCopyFromIndices.Dim();i++) {
+        assert(mCopyFromIndices[i] >= 0 && mCopyFromIndices[i] < X.Cols());
+      }
+        
+      for(size_t r=0; r<X.Rows(); r++) {
+        for(size_t c=0; c<Y.Cols(); c++) {
+          Y(r,c) = X(r,mCopyFromIndices[c]);
+        }
+      }
+    }
+
+    void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+    { Error("__func__ Nonsense"); }
+
+   protected:
+    Vector<int> mCopyFromIndices;
+  };
+  
+  class Transpose : public Component
+  {
+   public:
+    Transpose(size_t nInputs, size_t nOutputs, Component* pPred)
+      : Component(nInputs,nOutputs,pPred), mContext(0)
+    { }
+
+    ~Transpose()
+    { }
+
+    ComponentType GetType() const
+    { return TRANSPOSE; }
+
+    const char* GetName() const
+    { return "<transpose>"; }
+ 
+    Component* Clone() const  
+    { 
+      Transpose* p = new Transpose(GetNInputs(),GetNOutputs(),NULL); 
+      p->mCopyFromIndices.Init(mCopyFromIndices.Dim());
+      p->mCopyFromIndices.Copy(mCopyFromIndices); 
+      p->mContext = mContext;
+      return p; 
+    }
+  
+    void ReadFromStream(std::istream& rIn)
+    { 
+      rIn >> std::ws >> mContext;
+
+      if(GetNInputs() != GetNOutputs()) { 
+        Error("Input dim must be same as output dim"); 
+      }
+      
+      Vector<int> vec(GetNInputs());
+      int channels = GetNInputs() / mContext;
+      for(int i=0, ch=0; ch<channels; ch++) {
+        for(int idx=ch; idx < (int)GetNInputs(); idx+=channels, i++) {
+          assert(i < (int)GetNInputs());
+          vec[i] = idx;
+        }
+      }
+
+      mCopyFromIndices.Init(vec.Dim()).Copy(vec); 
+    }
+
+    void WriteToStream(std::ostream& rOut)  
+    { rOut << " " << mContext << "\n"; }
+     
+   protected:
+    void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+    { 
+      assert(mCopyFromIndices.Dim() == Y.Cols());
+      for(int i=0; i<mCopyFromIndices.Dim();i++) {
+        assert(mCopyFromIndices[i] >= 0 && mCopyFromIndices[i] < X.Cols());
+      }
+        
+      for(size_t r=0; r<X.Rows(); r++) {
+        for(size_t c=0; c<Y.Cols(); c++) {
+          Y(r,c) = X(r,mCopyFromIndices[c]);
+        }
+      }
+    }
+
+    void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+    { Error("__func__ Nonsense"); }
+
+   protected:
+    int mContext;
+    Vector<int> mCopyFromIndices;
+  };
+
+
+  /**
+   * BlockLinearity is used for the blockwise multiplication by 
+   * DCT transform loaded from disk
+   */
+  class BlockLinearity : public Component
+  {
+    public:
+      BlockLinearity(size_t nInputs, size_t nOutputs, Component* pPred)
+        : Component(nInputs,nOutputs,pPred)
+      { }
+
+      ~BlockLinearity()
+      { }
+
+
+      ComponentType GetType() const
+      { return Component::BLOCK_LINEARITY; }
+
+      const char* GetName() const
+      { return "<blocklinearity>"; }
+
+      Component* Clone() const 
+      { 
+        BlockLinearity* p = new BlockLinearity(GetNInputs(),GetNOutputs(),NULL);
+        p->mBlockLinearity.Init(mBlockLinearity.Rows(),mBlockLinearity.Cols()); 
+        p->mBlockLinearity.Copy(mBlockLinearity); 
+        return p; 
+      }
+
+      void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y) 
+      {
+        assert(X.Rows() == Y.Rows());
+        assert(X.Cols()%mBlockLinearity.Rows() == 0);
+        assert(Y.Cols()%mBlockLinearity.Cols() == 0);
+        assert(X.Cols()/mBlockLinearity.Rows() == Y.Cols()/mBlockLinearity.Cols());
+        
+        int instN = X.Cols()/mBlockLinearity.Rows();
+        for(int inst=0; inst<instN; inst++) {
+#ifndef DOUBLEPRECISION
+          cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
+                  X.Rows(), mBlockLinearity.Cols(), mBlockLinearity.Rows(),
+                  1.0, X.pData()+inst*mBlockLinearity.Rows(), X.Stride(), 
+                  mBlockLinearity.pData(), mBlockLinearity.Stride(),
+                  0.0, Y.pData()+inst*mBlockLinearity.Cols(), Y.Stride());
+#else
+          cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
+                  X.Rows(), mBlockLinearity.Cols(), mBlockLinearity.Rows(),
+                  1.0, X.pData()+inst*mBlockLinearity.Rows(), X.Stride(), 
+                  mBlockLinearity.pData(), mBlockLinearity.Stride(),
+                  0.0, Y.pData()+inst*mBlockLinearity.Cols(), Y.Stride());
+#endif
+        }
+      }
+        
+      void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y) 
+      { Error("__func__ Not implemented"); }
+
+
+      void ReadFromStream(std::istream& rIn)
+      { 
+        Matrix<BaseFloat> mat;
+        rIn >> mat;
+        Matrix<BaseFloat> trans(mat,TRANS);
+        mBlockLinearity.Init(trans.Rows(),trans.Cols()).Copy(trans);
+
+        if((GetNOutputs() % mBlockLinearity.Cols() != 0) ||
+           (GetNInputs() % mBlockLinearity.Rows() != 0) ||
+           ((GetNOutputs() / mBlockLinearity.Cols()) != 
+            (GetNInputs() / mBlockLinearity.Rows()))) 
+        {
+          Error("BlockLinearity matrix dimensions must divide IO dims");
+        }
+      }
+
+      void WriteToStream(std::ostream& rOut)
+      {
+        Matrix<BaseFloat> trans(mBlockLinearity,TRANS);
+        rOut << trans;
+      }
+
+    private:
+      Matrix<BaseFloat> mBlockLinearity;
+  };
+
+
+  
+  class Bias : public Component
+  {
+    public:
+      Bias(size_t nInputs, size_t nOutputs, Component* pPred)
+        : Component(nInputs,nOutputs,pPred)
+      { }
+
+      ~Bias()
+      { }
+
+
+      ComponentType GetType() const
+      { return Component::BIAS; }
+
+      const char* GetName() const
+      { return "<bias>"; }
+
+      Component* Clone() const  
+      { 
+        Bias* p = new Bias(GetNInputs(),GetNOutputs(),NULL);
+        p->mBias.Init(mBias.Dim()); 
+        p->mBias.Copy(mBias); 
+        return p; 
+      }
+
+      void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+      { 
+        Y.Copy(X); 
+        for(size_t r=0; r<X.Rows(); r++) {
+          for(size_t c=0; c<X.Cols(); c++) {
+            Y(r,c) += mBias[c];
+          }
+        }
+      }
+
+      void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+      { Y.Copy(X); }
+  
+     
+      void ReadFromStream(std::istream& rIn)
+      { rIn >> mBias; }
+
+      void WriteToStream(std::ostream& rOut)
+      { rOut << mBias; }
+
+    private:
+      Vector<BaseFloat> mBias;
+  };
+
+
+
+  class Window : public Component
+  {
+    public:
+      Window(size_t nInputs, size_t nOutputs, Component* pPred)
+        : Component(nInputs, nOutputs, pPred)
+      { }
+
+      ~Window()
+      { }
+
+
+      ComponentType GetType() const
+      { return Component::WINDOW; }
+
+      const char* GetName() const
+      { return "<window>"; }
+
+      Component* Clone() const  
+      { 
+        Window* p = new Window(GetNInputs(),GetNOutputs(),NULL);
+        p->mWindow.Init(mWindow.Dim()); 
+        p->mWindow.Copy(mWindow); 
+        return p; 
+      }
+
+
+      void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+      { Y.Copy(X); 
+        for(size_t r=0; r<X.Rows(); r++) {
+          for(size_t c=0; c<X.Cols(); c++) {
+            Y(r,c) *= mWindow[c];
+          }
+        }
+      }
+
+      void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+      { Error("__func__ Not implemented"); }
+     
+      
+      void ReadFromStream(std::istream& rIn)
+      { rIn >> mWindow; }
+
+      void WriteToStream(std::ostream& rOut)
+      { rOut << mWindow; }
+
+    private:
+      Vector<BaseFloat> mWindow;
+  };
+
+  class Log : public Component
+  {
+    public:
+      Log(size_t nInputs, size_t nOutputs, Component* pPred)
+        : Component(nInputs, nOutputs, pPred)
+      { }
+
+      ~Log()
+      { }
+
+
+      ComponentType GetType() const
+      { return Component::LOG; }
+
+      const char* GetName() const
+      { return "<log>"; }
+
+      Component* Clone() const  
+      { return new Log(GetNInputs(),GetNOutputs(),NULL); }
+
+
+      void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+      { Y.Copy(X); Y.ApplyLog(); }
+
+      void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+      { Error("__func__ Not implemented"); }
+     
+      
+      void ReadFromStream(std::istream& rIn)
+      { }
+
+      void WriteToStream(std::ostream& rOut)
+      { }
+
+  };
+
+}
+
+
+#endif
+
diff --git a/src/TNetLib/.svn/text-base/Cache.cc.svn-base b/src/TNetLib/.svn/text-base/Cache.cc.svn-base
new file mode 100644
index 0000000..f498318
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Cache.cc.svn-base
@@ -0,0 +1,248 @@
+
+#include <sys/time.h>
+
+#include "Cache.h"
+#include "Matrix.h"
+#include "Vector.h"
+
+
+namespace TNet {
+
+  Cache::
+  Cache()
+    : mState(EMPTY), mIntakePos(0), mExhaustPos(0), mDiscarded(0), 
+      mRandomized(false), mTrace(0)
+  { }
+
+  Cache::
+  ~Cache()
+  { }
+
+  void
+  Cache::
+  Init(size_t cachesize, size_t bunchsize, long int seed)
+  {
+    if((cachesize % bunchsize) != 0) {
+      KALDI_ERR << "Non divisible cachesize" << cachesize
+                << " by bunchsize" << bunchsize;
+    }
+    
+    mCachesize = cachesize;
+    mBunchsize = bunchsize;
+
+    mState = EMPTY;
+
+    mIntakePos = 0;
+    mExhaustPos = 0;
+
+    mRandomized = false;
+
+    if(seed == 0) {
+      //generate seed
+      struct timeval tv;
+      if (gettimeofday(&tv, 0) == -1) {
+        Error("gettimeofday does not work.");
+        exit(-1);
+      }
+      seed = (int)(tv.tv_sec) + (int)tv.tv_usec + (int)(tv.tv_usec*tv.tv_usec);
+    }
+
+    srand48(seed);
+
+  }
+
+  void 
+  Cache::
+  AddData(const Matrix<BaseFloat>& rFeatures, const Matrix<BaseFloat>& rDesired)
+  {
+    assert(rFeatures.Rows() == rDesired.Rows());
+
+    //lazy buffers allocation
+    if(mFeatures.Rows() != mCachesize) {
+      mFeatures.Init(mCachesize,rFeatures.Cols());
+      mDesired.Init(mCachesize,rDesired.Cols());
+    }
+
+    //warn if segment longer than half-cache
+    if(rFeatures.Rows() > mCachesize/2) {
+      std::ostringstream os;
+      os << "Too long segment and small feature cache! "
+         << " cachesize: " << mCachesize
+         << " segmentsize: " << rFeatures.Rows();
+      Warning(os.str());
+    }
+
+    //change state
+    if(mState == EMPTY) { 
+      if(mTrace&3) std::cout << "/" << std::flush; 
+      mState = INTAKE; mIntakePos = 0;
+     
+      //check for leftover from previous segment 
+      int leftover = mFeaturesLeftover.Rows();
+      //check if leftover is not bigger than cachesize
+      if(leftover > mCachesize) {
+        std::ostringstream os;
+        os << "Too small feature cache: " << mCachesize
+           << ", truncating: "
+           << leftover - mCachesize << " frames from previous segment leftover";
+        //Error(os.str());
+        Warning(os.str());
+        leftover = mCachesize;
+      }
+      //prefill cache with leftover
+      if(leftover > 0) {
+        memcpy(mFeatures.pData(),mFeaturesLeftover.pData(),
+          (mFeaturesLeftover.MSize() < mFeatures.MSize()?
+           mFeaturesLeftover.MSize() : mFeatures.MSize()) 
+        );
+        memcpy(mDesired.pData(),mDesiredLeftover.pData(),
+          (mDesiredLeftover.MSize() < mDesired.MSize()?
+           mDesiredLeftover.MSize() : mDesired.MSize()) 
+        );
+        mFeaturesLeftover.Destroy();
+        mDesiredLeftover.Destroy();
+        mIntakePos += leftover;
+      } 
+    }
+
+    assert(mState == INTAKE);
+    assert(rFeatures.Rows() == rDesired.Rows());
+    if(mTrace&2) std::cout << "F" << std::flush; 
+
+    int cache_space = mCachesize - mIntakePos;
+    int feature_length = rFeatures.Rows();
+    int fill_rows = (cache_space<feature_length)? cache_space : feature_length;
+    int leftover = feature_length - fill_rows;
+
+    assert(cache_space > 0);
+    assert(mFeatures.Stride()==rFeatures.Stride());
+    assert(mDesired.Stride()==rDesired.Stride());
+
+    //copy the data to cache
+    memcpy(mFeatures.pData()+mIntakePos*mFeatures.Stride(),
+           rFeatures.pData(),
+           fill_rows*mFeatures.Stride()*sizeof(BaseFloat));
+
+    memcpy(mDesired.pData()+mIntakePos*mDesired.Stride(),
+           rDesired.pData(),
+           fill_rows*mDesired.Stride()*sizeof(BaseFloat));
+
+    //copy leftovers
+    if(leftover > 0) {
+      mFeaturesLeftover.Init(leftover,mFeatures.Cols());
+      mDesiredLeftover.Init(leftover,mDesired.Cols());
+
+      memcpy(mFeaturesLeftover.pData(),
+             rFeatures.pData()+fill_rows*rFeatures.Stride(),
+             mFeaturesLeftover.MSize());
+
+      memcpy(mDesiredLeftover.pData(),
+             rDesired.pData()+fill_rows*rDesired.Stride(),
+             mDesiredLeftover.MSize());       
+    }
+ 
+    //update cursor
+    mIntakePos += fill_rows;
+    
+    //change state
+    if(mIntakePos == mCachesize) { 
+      if(mTrace&3) std::cout << "\\" << std::flush; 
+      mState = FULL;
+    }
+  }
+
+
+
+  void
+  Cache::
+  Randomize()
+  {
+    assert(mState == FULL || mState == INTAKE);
+
+    if(mTrace&3) std::cout << "R" << std::flush;
+
+    //lazy initialization of the output buffers
+    mFeaturesRandom.Init(mCachesize,mFeatures.Cols());
+    mDesiredRandom.Init(mCachesize,mDesired.Cols());
+
+    //generate random series of integers
+    Vector<int> randmask(mIntakePos);
+    for(unsigned int i=0; i<mIntakePos; i++) {
+      randmask[i]=i;
+    }
+    int* ptr = randmask.pData();
+    std::random_shuffle(ptr, ptr+mIntakePos, GenerateRandom);
+
+    //randomize
+    for(int i=0; i<randmask.Dim(); i++) {
+      mFeaturesRandom[i].Copy(mFeatures[randmask[i]]);
+      mDesiredRandom[i].Copy(mDesired[randmask[i]]);
+    }
+
+    mRandomized = true;
+  }
+
+  void
+  Cache::
+  GetBunch(Matrix<BaseFloat>& rFeatures, Matrix<BaseFloat>& rDesired)
+  {
+    if(mState == EMPTY) {
+      Error("GetBunch on empty cache!!!");
+    }
+
+    //change state if full...
+    if(mState == FULL) { 
+      if(mTrace&3) std::cout << "\\" << std::flush; 
+      mState = EXHAUST; mExhaustPos = 0; 
+    }
+
+    //final cache is not completely filled
+    if(mState == INTAKE) {
+      if(mTrace&3) std::cout << "\\-LAST_CACHE\n" << std::flush; 
+      mState = EXHAUST; mExhaustPos = 0; 
+    } 
+
+    assert(mState == EXHAUST);
+
+    //init the output
+    if(rFeatures.Rows()!=mBunchsize || rFeatures.Cols()!=mFeatures.Cols()) {
+      rFeatures.Init(mBunchsize,mFeatures.Cols());
+    }
+    if(rDesired.Rows()!=mBunchsize || rDesired.Cols()!=mDesired.Cols()) {
+      rDesired.Init(mBunchsize,mDesired.Cols());
+    }
+
+    //copy the output
+    if(mRandomized) {
+      memcpy(rFeatures.pData(),
+             mFeaturesRandom.pData()+mExhaustPos*mFeatures.Stride(),
+             rFeatures.MSize());
+
+      memcpy(rDesired.pData(),
+             mDesiredRandom.pData()+mExhaustPos*mDesired.Stride(),
+             rDesired.MSize());
+    } else {
+      memcpy(rFeatures.pData(),
+             mFeatures.pData()+mExhaustPos*mFeatures.Stride(),
+             rFeatures.MSize());
+
+      memcpy(rDesired.pData(),
+             mDesired.pData()+mExhaustPos*mDesired.Stride(),
+             rDesired.MSize());
+    }
+
+
+    //update cursor
+    mExhaustPos += mBunchsize;
+
+    //change state to EMPTY
+    if(mExhaustPos > mIntakePos-mBunchsize) {
+      //we don't have more complete bunches...
+      mDiscarded += mIntakePos - mExhaustPos;
+
+      mState = EMPTY;
+    }
+  }
+
+
+}
diff --git a/src/TNetLib/.svn/text-base/Cache.h.svn-base b/src/TNetLib/.svn/text-base/Cache.h.svn-base
new file mode 100644
index 0000000..800d92c
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Cache.h.svn-base
@@ -0,0 +1,74 @@
+#ifndef _CUCACHE_H_
+#define _CUCACHE_H_
+
+#include "Matrix.h"
+
+namespace TNet {
+
+
+  /**
+   * The feature-target pair cache
+   */
+  class Cache {
+    typedef enum { EMPTY, INTAKE, FULL, EXHAUST } State;
+    public:
+      Cache();
+      ~Cache();
+     
+      /// Initialize the cache
+      void Init(size_t cachesize, size_t bunchsize, long int seed = 0);
+
+      /// Add data to cache, returns number of added vectors
+      void AddData(const Matrix<BaseFloat>& rFeatures, const Matrix<BaseFloat>& rDesired);
+      /// Randomizes the cache
+      void Randomize();
+      /// Get the bunch of training data
+      void GetBunch(Matrix<BaseFloat>& rFeatures, Matrix<BaseFloat>& rDesired);
+
+
+      /// Returns true if the cache was completely filled
+      bool Full()
+      { return (mState == FULL); }
+      
+      /// Returns true if the cache is empty
+      bool Empty()
+      { return (mState == EMPTY || mIntakePos < mBunchsize); }
+      
+      /// Number of discarded frames
+      int Discarded() 
+      { return mDiscarded; }
+      
+      /// Set the trace message level
+      void Trace(int trace)
+      { mTrace = trace; }
+
+    private:
+    
+      static long int GenerateRandom(int max)
+      { return lrand48() % max; }
+      
+      State mState; ///< Current state of the cache
+
+      size_t mIntakePos; ///< Number of intaken vectors by AddData
+      size_t mExhaustPos; ///< Number of exhausted vectors by GetBunch
+      
+      size_t mCachesize; ///< Size of cache
+      size_t mBunchsize; ///< Size of bunch
+      int mDiscarded; ///< Number of discarded frames
+
+      Matrix<BaseFloat> mFeatures; ///< Feature cache
+      Matrix<BaseFloat> mFeaturesRandom; ///< Feature cache
+      Matrix<BaseFloat> mFeaturesLeftover; ///< Feature cache
+      
+      Matrix<BaseFloat> mDesired;  ///< Desired vector cache
+      Matrix<BaseFloat> mDesiredRandom;  ///< Desired vector cache
+      Matrix<BaseFloat> mDesiredLeftover;  ///< Desired vector cache
+
+      bool mRandomized;
+
+      int mTrace;
+  }; 
+
+}
+
+#endif
diff --git a/src/TNetLib/.svn/text-base/Component.h.svn-base b/src/TNetLib/.svn/text-base/Component.h.svn-base
new file mode 100644
index 0000000..762451e
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Component.h.svn-base
@@ -0,0 +1,387 @@
+#ifndef _NETWORK_COMPONENT_I_H
+#define _NETWORK_COMPONENT_I_H
+
+
+#include "Vector.h"
+#include "Matrix.h"
+
+#include <iostream>
+#include <stdexcept>
+
+
+namespace TNet {
+
+    
+  /**
+   * Basic element of the network,
+   * it is a box with defined inputs and outputs, 
+   * and functions to refresh outputs
+   *
+   * it is able to compute tranformation function (forward pass) 
+   * and jacobian function (backward pass), 
+   * which is to be implemented in descendents
+   */ 
+  class Component 
+  {
+    public:
+    /// Types of the net components
+    typedef enum { 
+      UPDATABLE_COMPONENT = 0x0100, 
+      BIASED_LINEARITY,
+      SHARED_LINEARITY,
+
+      ACT_FUN = 0x0200, 
+      SOFTMAX,
+      SIGMOID,
+      BLOCK_SOFTMAX, 
+
+      OTHER = 0x0400,
+      EXPAND,
+      COPY,
+      TRANSPOSE,
+      BLOCK_LINEARITY,
+      WINDOW,
+      BIAS,
+      LOG,
+      
+      BLOCK_ARRAY,
+    } ComponentType;
+
+
+    //////////////////////////////////////////////////////////////
+    // Constructor & Destructor
+    public: 
+      Component(size_t nInputs, size_t nOutputs, Component *pPred); 
+      virtual ~Component();  
+       
+    //////////////////////////////////////////////////////////////
+    // Interface specification (public)
+    public:
+      /// Get Type Identification of the component
+      virtual ComponentType GetType() const = 0;  
+      /// Get Type Label of the component
+      virtual const char* GetName() const = 0;
+      /// 
+      virtual bool IsUpdatable() const 
+      { return false; }
+      /// Clone the component
+      virtual Component* Clone() const = 0; 
+
+      /// Get size of input vectors
+      size_t GetNInputs() const;  
+      /// Get size of output vectors 
+      size_t GetNOutputs() const; 
+     
+      /// IO Data getters
+      const Matrix<BaseFloat>& GetInput() const; 
+      const Matrix<BaseFloat>& GetOutput() const;
+      const Matrix<BaseFloat>& GetErrorInput() const;
+      const Matrix<BaseFloat>& GetErrorOutput() const;
+      
+      /// Set input vector (bind with the preceding NetworkComponent)
+      void SetInput(const Matrix<BaseFloat>& rInput);           
+      /// Set error input vector (bind with the following NetworkComponent) 
+      void SetErrorInput(const Matrix<BaseFloat>& rErrorInput);  
+       
+      /// Perform forward pass propagateion Input->Output
+      void Propagate(); 
+      /// Perform backward pass propagateion ErrorInput->ErrorOutput
+      void Backpropagate(); 
+ 
+      /// Reads the component parameters from stream
+      virtual void ReadFromStream(std::istream& rIn)  { }
+      /// Writes the components parameters to stream
+      virtual void WriteToStream(std::ostream& rOut)  { } 
+
+
+    ///////////////////////////////////////////////////////////////
+    // Nonpublic member functions used to update data outputs 
+    protected:
+      /// Forward pass transformation (to be implemented by descendents...)
+      virtual void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y) = 0;
+      /// Backward pass transformation (to be implemented by descendents...)
+      virtual void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y) = 0;
+
+   
+    ///////////////////////////////////////////////////////////////
+    // data members
+    protected:
+
+      size_t mNInputs;  ///< Size of input vectors
+      size_t mNOutputs; ///< Size of output vectors 
+      
+      const Matrix<BaseFloat>* mpInput; ///< inputs are NOT OWNED by component
+      const Matrix<BaseFloat>* mpErrorInput;///< inputs are NOT OWNED by component
+
+      Matrix<BaseFloat> mOutput; ///< outputs are OWNED by component
+      Matrix<BaseFloat> mErrorOutput; ///< outputs are OWNED by component
+
+  };
+
+
+  /**
+   * Class UpdatableComponent is a box which has some 
+   * parameters adjustable by learning
+   *
+   * you can set the learning rate, lock the params,
+   * and learn from each data observation
+   */
+  class UpdatableComponent : public Component
+  {
+    //////////////////////////////////////////////////////////////
+    // Constructor & Destructor
+    public: 
+      UpdatableComponent(size_t nInputs, size_t nOutputs, Component *pPred); 
+      virtual ~UpdatableComponent();
+
+
+    //////////////////////////////////////////////////////////////
+    // Interface specification (public)
+    public:
+      ///
+      virtual bool IsUpdatable() const 
+      { return true; }
+
+      /// calculate gradient
+      virtual void Gradient() = 0;
+      /// accumulate gradient from other components
+      virtual void AccuGradient(const UpdatableComponent& src, int thr, int thrN) = 0;  
+      /// update weights, reset the accumulator
+      virtual void Update(int thr, int thrN) = 0;
+
+      /// Sets the learning rate of gradient descent
+      void LearnRate(BaseFloat rate);
+      /// Gets the learning rate of gradient descent
+      BaseFloat LearnRate() const;
+
+      void Momentum(BaseFloat mmt);
+      BaseFloat Momentum() const ;
+
+      void Weightcost(BaseFloat cost);
+      BaseFloat Weightcost() const;
+
+      void Bunchsize(size_t size);
+      size_t Bunchsize() const;
+
+    protected:
+      BaseFloat mLearningRate;
+      BaseFloat mMomentum;
+      BaseFloat mWeightcost;
+      size_t mBunchsize;
+  };
+
+
+
+
+  //////////////////////////////////////////////////////////////////////////
+  // INLINE FUNCTIONS 
+  // Component::
+  inline
+  Component::
+  Component(size_t nInputs, size_t nOutputs, Component *pPred) 
+    : mNInputs(nInputs), mNOutputs(nOutputs), 
+      mpInput(NULL), mpErrorInput(NULL), 
+      mOutput(), mErrorOutput()
+  { 
+    /* DOUBLE LINK the Components */
+    if (pPred != NULL) {
+      SetInput(pPred->GetOutput());
+      pPred->SetErrorInput(GetErrorOutput());
+    }
+  } 
+
+
+  inline
+  Component::
+  ~Component()
+  {
+    ;
+  }
+
+  inline void
+  Component::
+  Propagate()
+  {
+    //initialize output buffer
+    if(mOutput.Rows() != GetInput().Rows() || mOutput.Cols() != GetNOutputs()) {
+      mOutput.Init(GetInput().Rows(),GetNOutputs());
+    }
+    //do the dimensionality test
+    if(GetNInputs() != GetInput().Cols()) {
+      KALDI_ERR << "Non-matching INPUT dim!!! Network dim: " << GetNInputs() 
+                << " Data dim: " << GetInput().Cols();
+    }
+    //run transform
+    PropagateFnc(GetInput(),mOutput);
+  
+  }
+
+
+  inline void
+  Component::
+  Backpropagate()
+  {
+    //re-initialize the output buffer
+    if(mErrorOutput.Rows() != GetErrorInput().Rows() || mErrorOutput.Cols() != GetNInputs()) {
+      mErrorOutput.Init(GetErrorInput().Rows(),GetNInputs());
+    }
+
+    //do the dimensionality test
+    assert(GetErrorInput().Cols() == mNOutputs);
+    assert(mErrorOutput.Cols() == mNInputs);
+    assert(mErrorOutput.Rows() == GetErrorInput().Rows());
+
+    //transform
+    BackpropagateFnc(GetErrorInput(),mErrorOutput);
+
+ }
+
+
+  inline void
+  Component::
+  SetInput(const Matrix<BaseFloat>& rInput)
+  {
+    mpInput = &rInput;
+  }
+
+
+  inline void
+  Component::
+  SetErrorInput(const Matrix<BaseFloat>& rErrorInput)
+  {
+    mpErrorInput = &rErrorInput;
+  }
+
+
+  inline const Matrix<BaseFloat>&
+  Component::
+  GetInput() const
+  {
+    if (NULL == mpInput) Error("mpInput is NULL");
+    return *mpInput;
+  }
+
+  inline const Matrix<BaseFloat>&
+  Component::
+  GetOutput() const
+  {
+    return mOutput;
+  }
+
+  inline const Matrix<BaseFloat>&
+  Component::
+  GetErrorInput() const
+  {
+    if (NULL == mpErrorInput) Error("mpErrorInput is NULL");
+    return *mpErrorInput;
+  }
+
+  inline const Matrix<BaseFloat>&
+  Component::
+  GetErrorOutput() const
+  {
+    return mErrorOutput;
+  }
+
+  inline size_t
+  Component::
+  GetNInputs() const
+  {
+    return mNInputs;
+  }
+
+  inline size_t
+  Component::
+  GetNOutputs() const
+  {
+    return mNOutputs;
+  }
+
+
+
+  //////////////////////////////////////////////////////////////////////////
+  // INLINE FUNCTIONS 
+  // UpdatableComponent::
+  
+  inline 
+  UpdatableComponent::
+  UpdatableComponent(size_t nInputs, size_t nOutputs, Component *pPred) 
+    : Component(nInputs, nOutputs, pPred), 
+      mLearningRate(0.0), mMomentum(0.0), mWeightcost(0.0), mBunchsize(0)
+  {
+    ; 
+  } 
+
+
+  inline
+  UpdatableComponent::
+  ~UpdatableComponent()
+  {
+    ;
+  }
+
+
+  inline void
+  UpdatableComponent::
+  LearnRate(BaseFloat rate)
+  {
+    mLearningRate = rate;
+  }
+
+  inline BaseFloat
+  UpdatableComponent::
+  LearnRate() const
+  {
+    return mLearningRate;
+  }
+
+
+  inline void
+  UpdatableComponent::
+  Momentum(BaseFloat mmt)
+  {
+    mMomentum = mmt;
+  }
+
+  inline BaseFloat
+  UpdatableComponent::
+  Momentum() const
+  {
+    return mMomentum;
+  }
+  
+  
+  inline void
+  UpdatableComponent::
+  Weightcost(BaseFloat cost)
+  {
+    mWeightcost = cost;
+  }
+
+  inline BaseFloat
+  UpdatableComponent::
+  Weightcost() const
+  {
+    return mWeightcost;
+  }
+
+  
+  inline void
+  UpdatableComponent::
+  Bunchsize(size_t size)
+  {
+    mBunchsize = size;
+  }
+
+  inline size_t
+  UpdatableComponent::
+  Bunchsize() const
+  {
+    return mBunchsize;
+  }
+
+
+} // namespace TNet
+
+
+#endif
diff --git a/src/TNetLib/.svn/text-base/Makefile.svn-base b/src/TNetLib/.svn/text-base/Makefile.svn-base
new file mode 100644
index 0000000..58ff988
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Makefile.svn-base
@@ -0,0 +1,29 @@
+
+include ../tnet.mk
+
+INCLUDE = -I. -I../KaldiLib -I../STKLib/ 
+
+all: libTNetLib.a
+
+libTNetLib.a: $(OBJ)
+	$(AR) ruv $@ $(OBJ) 
+	$(RANLIB) $@
+
+%.o : %.cc
+	$(CXX)  -o $@  -c $< $(CFLAGS) $(CXXFLAGS) $(INCLUDE)
+
+
+
+.PHONY: clean doc depend
+clean:
+	rm -f *.o *.a
+
+doc:
+	doxygen ../../doc/doxyfile_TNetLib 
+
+depend:
+	$(CXX) -M $(CXXFLAGS) *.cc $(INCLUDE) > .depend.mk
+
+-include .depend.mk
+
+
diff --git a/src/TNetLib/.svn/text-base/Mutex.cc.svn-base b/src/TNetLib/.svn/text-base/Mutex.cc.svn-base
new file mode 100644
index 0000000..4ec956a
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Mutex.cc.svn-base
@@ -0,0 +1,48 @@
+
+#include <pthread.h>
+#include <cerrno>
+
+#include "Error.h"
+#include "Mutex.h"
+
+namespace TNet {
+  
+
+Mutex::Mutex() {
+  if(0 != pthread_mutex_init(&mutex_,NULL)) 
+    KALDI_ERR << "Cannot initialize mutex";
+}
+
+
+Mutex::~Mutex() {
+  if(0 != pthread_mutex_destroy(&mutex_)) 
+    KALDI_ERR << "Cannot destroy mutex";
+}
+
+
+void Mutex::Lock() {
+  if(0 != pthread_mutex_lock(&mutex_))
+    KALDI_ERR << "Error on locking mutex";
+}
+
+ 
+bool Mutex::TryLock() {
+  int ret = pthread_mutex_lock(&mutex_);
+  switch (ret) {
+    case 0: return true;
+    case EBUSY: return false;
+    default: KALDI_ERR << "Error on try-locking mutex";
+  }
+  return 0;//make compiler not complain
+}
+
+
+void Mutex::Unlock() {
+  if(0 != pthread_mutex_unlock(&mutex_))
+    KALDI_ERR << "Error on unlocking mutex";
+}
+
+
+  
+}//namespace TNet
+
diff --git a/src/TNetLib/.svn/text-base/Mutex.h.svn-base b/src/TNetLib/.svn/text-base/Mutex.h.svn-base
new file mode 100644
index 0000000..ae2cfff
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Mutex.h.svn-base
@@ -0,0 +1,34 @@
+
+#include <pthread.h>
+
+namespace TNet {
+
+/**
+ * This class encapsulates mutex to ensure 
+ * exclusive access to some critical section
+ * which manipulates shared resources.
+ *
+ * The mutex must be unlocked from the 
+ * SAME THREAD which locked it
+ */
+class Mutex {
+ public:
+  Mutex();
+  ~Mutex();
+
+  void Lock();
+
+  /**
+   * Try to lock the mutex without waiting for it.
+   * Returns: true when lock successfull,
+   *         false when mutex was already locked
+   */
+  bool TryLock();
+
+  void Unlock();
+
+ private:
+  pthread_mutex_t mutex_;
+};
+
+} //namespace TNet
diff --git a/src/TNetLib/.svn/text-base/Nnet.cc.svn-base b/src/TNetLib/.svn/text-base/Nnet.cc.svn-base
new file mode 100644
index 0000000..4b364ac
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Nnet.cc.svn-base
@@ -0,0 +1,360 @@
+
+#include <algorithm>
+//#include <locale>
+#include <cctype>
+
+#include "Nnet.h"
+#include "CRBEDctFeat.h"
+#include "BlockArray.h"
+
+namespace TNet {
+
+
+
+
+void Network::Feedforward(const Matrix<BaseFloat>& in, Matrix<BaseFloat>& out, 
+                          size_t start_frm_ext, size_t end_frm_ext) {
+  //empty network: copy input to output 
+  if(mNnet.size() == 0) {
+    if(out.Rows() != in.Rows() || out.Cols() != in.Cols()) {
+      out.Init(in.Rows(),in.Cols());
+    }
+    out.Copy(in);
+    return;
+  }
+  
+  //short input: propagate in one block  
+  if(in.Rows() < 5000) { 
+    Propagate(in,out);
+  } else {//long input: propagate per parts
+    //initialize
+    out.Init(in.Rows(),GetNOutputs());
+    Matrix<BaseFloat> tmp_in, tmp_out;
+    int done=0, block=1024;
+    //propagate first part
+    tmp_in.Init(block+end_frm_ext,in.Cols());
+    tmp_in.Copy(in.Range(0,block+end_frm_ext,0,in.Cols()));
+    Propagate(tmp_in,tmp_out);
+    out.Range(0,block,0,tmp_out.Cols()).Copy(
+      tmp_out.Range(0,block,0,tmp_out.Cols())
+    );
+    done += block;
+    //propagate middle parts
+    while((done+2*block) < in.Rows()) {
+      tmp_in.Init(block+start_frm_ext+end_frm_ext,in.Cols());
+      tmp_in.Copy(in.Range(done-start_frm_ext, block+start_frm_ext+end_frm_ext, 0,in.Cols()));      Propagate(tmp_in,tmp_out);
+      out.Range(done,block,0,tmp_out.Cols()).Copy(
+        tmp_out.Range(start_frm_ext,block,0,tmp_out.Cols())
+      );
+      done += block;
+    }
+    //propagate last part
+    tmp_in.Init(in.Rows()-done+start_frm_ext,in.Cols());
+    tmp_in.Copy(in.Range(done-start_frm_ext,in.Rows()-done+start_frm_ext,0,in.Cols()));
+    Propagate(tmp_in,tmp_out);
+    out.Range(done,out.Rows()-done,0,out.Cols()).Copy(
+      tmp_out.Range(start_frm_ext,tmp_out.Rows()-start_frm_ext,0,tmp_out.Cols())   
+    );
+
+    done += tmp_out.Rows()-start_frm_ext;
+    assert(done == out.Rows());
+  }
+}
+
+
+void Network::Propagate(const Matrix<BaseFloat>& in, Matrix<BaseFloat>& out) {
+  //empty network: copy input to output 
+  if(mNnet.size() == 0) {
+    if(out.Rows() != in.Rows() || out.Cols() != in.Cols()) {
+      out.Init(in.Rows(),in.Cols());
+    }
+    out.Copy(in);
+    return;
+  }
+  
+  //this will keep pointer to matrix 'in', for backprop
+  mNnet.front()->SetInput(in); 
+
+  //propagate
+  LayeredType::iterator it;
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    (*it)->Propagate();
+  }
+
+  //copy the output matrix
+  const Matrix<BaseFloat>& mat = mNnet.back()->GetOutput();
+  if(out.Rows() != mat.Rows() || out.Cols() != mat.Cols()) {
+    out.Init(mat.Rows(),mat.Cols());
+  }
+  out.Copy(mat);
+
+}
+
+
+void Network::Backpropagate(const Matrix<BaseFloat>& globerr) {
+  //pass matrix to last component
+  mNnet.back()->SetErrorInput(globerr);
+
+  // back-propagation : reversed order,
+  LayeredType::reverse_iterator it;
+  for(it=mNnet.rbegin(); it!=mNnet.rend(); ++it) {
+    //first component does not backpropagate error (no predecessors)
+    if(*it != mNnet.front()) {
+      (*it)->Backpropagate();
+    }
+    //compute gradient if updatable component
+    if((*it)->IsUpdatable()) {
+      UpdatableComponent& comp = dynamic_cast<UpdatableComponent&>(**it);
+      comp.Gradient(); //compute gradient 
+    }
+  }
+}
+
+
+void Network::AccuGradient(const Network& src, int thr, int thrN) {
+  LayeredType::iterator it;
+  LayeredType::const_iterator it2;
+
+  for(it=mNnet.begin(), it2=src.mNnet.begin(); it!=mNnet.end(); ++it,++it2) {
+    if((*it)->IsUpdatable()) {
+      UpdatableComponent& comp = dynamic_cast<UpdatableComponent&>(**it);
+      const UpdatableComponent& comp2 = dynamic_cast<const UpdatableComponent&>(**it2);
+      comp.AccuGradient(comp2,thr,thrN);
+    }
+  }
+}
+
+
+void Network::Update(int thr, int thrN) {
+  LayeredType::iterator it;
+
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    if((*it)->IsUpdatable()) {
+      UpdatableComponent& comp = dynamic_cast<UpdatableComponent&>(**it);
+      comp.Update(thr,thrN);
+    }
+  }
+}
+
+
+Network* Network::Clone() {
+  Network* net = new Network;
+  LayeredType::iterator it;
+  for(it = mNnet.begin(); it != mNnet.end(); ++it) {
+    //clone
+    net->mNnet.push_back((*it)->Clone());
+    //connect network
+    if(net->mNnet.size() > 1) {
+      Component* last = *(net->mNnet.end()-1);
+      Component* prev = *(net->mNnet.end()-2);
+      last->SetInput(prev->GetOutput());
+      prev->SetErrorInput(last->GetErrorOutput());
+    }
+  }
+
+  //copy the learning rate
+  //net->SetLearnRate(GetLearnRate());
+
+  return net;
+}
+
+
+void Network::ReadNetwork(const char* pSrc) {
+  std::ifstream in(pSrc);
+  if(!in.good()) {
+    Error(std::string("Error, cannot read model: ")+pSrc);
+  }
+  ReadNetwork(in);
+  in.close();
+}
+
+  
+
+void Network::ReadNetwork(std::istream& rIn) {
+  //get the network elements from a factory
+  Component *pComp;
+  while(NULL != (pComp = ComponentFactory(rIn))) 
+    mNnet.push_back(pComp);
+}
+
+
+void Network::WriteNetwork(const char* pDst) {
+  std::ofstream out(pDst);
+  if(!out.good()) {
+    Error(std::string("Error, cannot write model: ")+pDst);
+  }
+  WriteNetwork(out);
+  out.close();
+}
+
+
+void Network::WriteNetwork(std::ostream& rOut) {
+  //dump all the componetns
+  LayeredType::iterator it;
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    ComponentDumper(rOut, **it);
+  }
+}
+ 
+
+Component*
+Network::
+ComponentFactory(std::istream& rIn)
+{
+  rIn >> std::ws;
+  if(rIn.eof()) return NULL;
+
+  Component* pRet=NULL;
+  Component* pPred=NULL;
+
+  std::string componentTag;
+  size_t nInputs, nOutputs;
+
+  rIn >> std::ws;
+  rIn >> componentTag;
+  if(componentTag == "") return NULL; //nothing left in the file
+
+  //make it lowercase
+  std::transform(componentTag.begin(), componentTag.end(), 
+                 componentTag.begin(), tolower);
+
+  //the 'endblock' tag terminates the network
+  if(componentTag == "<endblock>") return NULL;
+
+  
+  if(componentTag[0] != '<' || componentTag[componentTag.size()-1] != '>') {
+    Error(std::string("Invalid component tag:")+componentTag);
+  }
+
+  rIn >> std::ws;
+  rIn >> nOutputs;
+  rIn >> std::ws;
+  rIn >> nInputs;
+  assert(nInputs > 0 && nOutputs > 0);
+
+  //make coupling with predecessor
+  if(mNnet.size() == 0) {
+    pPred = NULL;
+  } else {
+    pPred = mNnet.back();
+  }
+  
+  //array with list of component tags
+  static const std::string TAGS[] = {
+    "<biasedlinearity>",
+    "<sharedlinearity>",
+    
+    "<sigmoid>",
+    "<softmax>",
+    "<blocksoftmax>",
+
+    "<expand>",
+    "<copy>",
+    "<transpose>",
+    "<blocklinearity>",
+    "<bias>",
+    "<window>",
+    "<log>",
+
+    "<blockarray>",
+  };
+
+  static const int n_tags = sizeof(TAGS) / sizeof(TAGS[0]);
+  int i = 0;
+  for(i=0; i<n_tags; i++) {
+    if(componentTag == TAGS[i]) break;
+  }
+  
+  //switch according to position in array TAGS
+  switch(i) {
+    case 0: pRet = new BiasedLinearity(nInputs,nOutputs,pPred); break;
+    case 1: pRet = new SharedLinearity(nInputs,nOutputs,pPred); break;
+
+    case 2: pRet = new Sigmoid(nInputs,nOutputs,pPred); break;
+    case 3: pRet = new Softmax(nInputs,nOutputs,pPred); break;
+    case 4: pRet = new BlockSoftmax(nInputs,nOutputs,pPred); break;
+
+    case 5: pRet = new Expand(nInputs,nOutputs,pPred); break;
+    case 6: pRet = new Copy(nInputs,nOutputs,pPred); break;
+    case 7: pRet = new Transpose(nInputs,nOutputs,pPred); break;
+    case 8: pRet = new BlockLinearity(nInputs,nOutputs,pPred); break;
+    case 9: pRet = new Bias(nInputs,nOutputs,pPred); break;
+    case 10: pRet = new Window(nInputs,nOutputs,pPred); break;
+    case 11: pRet = new Log(nInputs,nOutputs,pPred); break;
+    
+    case 12: pRet = new BlockArray(nInputs,nOutputs,pPred); break;
+
+    default: Error(std::string("Unknown Component tag:")+componentTag);
+  }
+ 
+  //read params if it is updatable component
+  pRet->ReadFromStream(rIn);
+  //return
+  return pRet;
+}
+
+
+void
+Network::
+ComponentDumper(std::ostream& rOut, Component& rComp)
+{
+  //use tags of all the components; or the identification codes
+  //array with list of component tags
+  static const Component::ComponentType TYPES[] = {
+    Component::BIASED_LINEARITY,
+    Component::SHARED_LINEARITY,
+    
+    Component::SIGMOID,
+    Component::SOFTMAX,
+    Component::BLOCK_SOFTMAX,
+
+    Component::EXPAND,
+    Component::COPY,
+    Component::TRANSPOSE,
+    Component::BLOCK_LINEARITY,
+    Component::BIAS,
+    Component::WINDOW,
+    Component::LOG,
+
+    Component::BLOCK_ARRAY,
+  };
+  static const std::string TAGS[] = {
+    "<biasedlinearity>",
+    "<sharedlinearity>",
+
+    "<sigmoid>",
+    "<softmax>",
+    "<blocksoftmax>",
+
+    "<expand>",
+    "<copy>",
+    "<transpose>",
+    "<blocklinearity>",
+    "<bias>",
+    "<window>",
+    "<log>",
+
+    "<blockarray>",
+  };
+  static const int MAX = sizeof TYPES / sizeof TYPES[0];
+
+  int i;
+  for(i=0; i<MAX; ++i) {
+    if(TYPES[i] == rComp.GetType()) break;
+  }
+  if(i == MAX) Error("Unknown ComponentType");
+  
+  //dump the component tag
+  rOut << TAGS[i] << " " 
+       << rComp.GetNOutputs() << " " 
+       << rComp.GetNInputs() << std::endl;
+
+  //dump the parameters (if any)
+  rComp.WriteToStream(rOut);
+}
+
+
+
+  
+} //namespace
+
diff --git a/src/TNetLib/.svn/text-base/Nnet.h.svn-base b/src/TNetLib/.svn/text-base/Nnet.h.svn-base
new file mode 100644
index 0000000..12e2585
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Nnet.h.svn-base
@@ -0,0 +1,194 @@
+#ifndef _NETWORK_H_
+#define _NETWORK_H_
+
+#include "Component.h"
+#include "BiasedLinearity.h"
+#include "SharedLinearity.h"
+#include "Activation.h"
+
+#include "Vector.h"
+
+#include <vector>
+
+
+namespace TNet {
+
+class Network
+{
+//////////////////////////////////////
+// Typedefs
+typedef std::vector<Component*> LayeredType;
+  
+  //////////////////////////////////////
+  // Disable copy construction and assignment
+ private:
+  Network(Network&); 
+  Network& operator=(Network&);
+   
+ public:
+  // allow incomplete network creation
+  Network() 
+  { }
+
+  ~Network();
+
+  int Layers() const
+  { return mNnet.size(); }
+
+  Component& Layer(int i)
+  { return *mNnet[i]; }
+ 
+  const Component& Layer(int i) const
+  { return *mNnet[i]; }
+
+  /// Feedforward the data per blocks, this needs less memory, 
+  /// and allows to process very long files.
+  /// It does not trim the *_frm_ext, but uses it 
+  /// for concatenation of segments
+  void Feedforward(const Matrix<BaseFloat>& in, Matrix<BaseFloat>& out, 
+                   size_t start_frm_ext, size_t end_frm_ext);
+  /// forward the data to the output
+  void Propagate(const Matrix<BaseFloat>& in, Matrix<BaseFloat>& out);
+  /// backpropagate the error while calculating the gradient
+  void Backpropagate(const Matrix<BaseFloat>& globerr); 
+
+  /// accumulate the gradient from other networks
+  void AccuGradient(const Network& src, int thr, int thrN);
+  /// update weights, reset the accumulator
+  void Update(int thr, int thrN);
+  
+  Network* Clone(); ///< Clones the network
+
+  void ReadNetwork(const char* pSrc);     ///< read the network from file
+  void ReadNetwork(std::istream& rIn);    ///< read the network from stream
+  void WriteNetwork(const char* pDst);    ///< write network to file
+  void WriteNetwork(std::ostream& rOut);  ///< write network to stream
+
+  size_t GetNInputs() const; ///< Dimensionality of the input features
+  size_t GetNOutputs() const; ///< Dimensionality of the desired vectors
+
+  void SetLearnRate(BaseFloat learnRate); ///< set the learning rate value
+  BaseFloat GetLearnRate();  ///< get the learning rate value
+
+  void SetWeightcost(BaseFloat l2); ///< set the L2 regularization const
+
+  void ResetBunchsize(); ///< reset the frame counter (needed for L2 regularization
+  void AccuBunchsize(const Network& src); ///< accumulate frame counts in bunch (needed in L2 regularization
+
+ private:
+  /// Creates a component by reading from stream
+  Component* ComponentFactory(std::istream& In);
+  /// Dumps component into a stream
+  void ComponentDumper(std::ostream& rOut, Component& rComp);
+
+ private:
+  LayeredType mNnet; ///< container with the network layers
+
+};
+  
+
+//////////////////////////////////////////////////////////////////////////
+// INLINE FUNCTIONS 
+// Network::
+inline Network::~Network() {
+  //delete all the components
+  LayeredType::iterator it;
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    delete *it;
+  }
+}
+
+    
+inline size_t Network::GetNInputs() const {
+  assert(mNnet.size() > 0);
+  return mNnet.front()->GetNInputs();
+}
+
+
+inline size_t
+Network::
+GetNOutputs() const
+{
+  assert(mNnet.size() > 0);
+  return mNnet.back()->GetNOutputs();
+}
+
+
+
+inline void
+Network::
+SetLearnRate(BaseFloat learnRate)
+{
+  LayeredType::iterator it;
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    if((*it)->IsUpdatable()) {
+      dynamic_cast<UpdatableComponent*>(*it)->LearnRate(learnRate);
+    }
+  }
+}
+
+
+inline BaseFloat
+Network::
+GetLearnRate()
+{
+  //TODO - learn rates may differ layer to layer
+  assert(mNnet.size() > 0);
+  for(size_t i=0; i<mNnet.size(); i++) {
+    if(mNnet[i]->IsUpdatable()) {
+      return dynamic_cast<UpdatableComponent*>(mNnet[i])->LearnRate();
+    }
+  }
+  Error("No updatable NetComponents");
+  return -1;
+}
+
+
+inline void
+Network::
+SetWeightcost(BaseFloat l2)
+{
+  LayeredType::iterator it;
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    if((*it)->IsUpdatable()) {
+      dynamic_cast<UpdatableComponent*>(*it)->Weightcost(l2);
+    }
+  }
+}
+
+
+inline void 
+Network::
+ResetBunchsize()
+{
+  LayeredType::iterator it;
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    if((*it)->IsUpdatable()) {
+      dynamic_cast<UpdatableComponent*>(*it)->Bunchsize(0);
+    }
+  }
+}
+
+inline void
+Network::
+AccuBunchsize(const Network& src)
+{
+  assert(Layers() == src.Layers());
+  assert(Layers() > 0);
+
+  for(int i=0; i<Layers(); i++) {
+    if(Layer(i).IsUpdatable()) {
+      UpdatableComponent& tgt_comp = dynamic_cast<UpdatableComponent&>(Layer(i));
+      const UpdatableComponent& src_comp = dynamic_cast<const UpdatableComponent&>(src.Layer(i));
+      tgt_comp.Bunchsize(tgt_comp.Bunchsize()+src_comp.GetOutput().Rows());
+    }
+  }
+}
+
+  
+
+} //namespace
+
+#endif
+
+
diff --git a/src/TNetLib/.svn/text-base/ObjFun.cc.svn-base b/src/TNetLib/.svn/text-base/ObjFun.cc.svn-base
new file mode 100644
index 0000000..c899fb1
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/ObjFun.cc.svn-base
@@ -0,0 +1,231 @@
+
+#include "ObjFun.h"
+#include "Error.h"
+
+#include <limits>
+
+namespace TNet {
+
+
+ObjectiveFunction* ObjectiveFunction::Factory(ObjFunType type) {
+  ObjectiveFunction* ret = NULL;
+  switch(type) {
+    case MEAN_SQUARE_ERROR: ret = new MeanSquareError;    break;
+    case CROSS_ENTROPY:     ret = new CrossEntropy;       break;
+    default: Error("Unknown ObjectiveFunction type");
+  }
+  return ret;
+}
+
+
+/*
+ * MeanSquareError
+ */
+void MeanSquareError::Evaluate(const Matrix<BaseFloat>& net_out, const Matrix<BaseFloat>& target, Matrix<BaseFloat>* err) {
+  
+  //check dimensions
+  assert(net_out.Rows() == target.Rows());
+  assert(net_out.Cols() == target.Cols());
+  if(err->Rows() != net_out.Rows() || err->Cols() != net_out.Cols()) {
+    err->Init(net_out.Rows(),net_out.Cols());
+  }
+
+  //compute global gradient
+  err->Copy(net_out);
+  err->AddScaled(-1,target);
+
+  //compute loss function
+  double sum = 0;
+  for(size_t r=0; r<err->Rows(); r++) {
+    for(size_t c=0; c<err->Cols(); c++) {
+      BaseFloat val = (*err)(r,c);
+      sum += val*val;
+    }
+  }
+  error_ += sum/2.0;
+  frames_ += net_out.Rows();
+}
+
+
+std::string MeanSquareError::Report() {
+  std::stringstream ss;
+  ss << "Mse:" << error_ << " frames:" << frames_
+     << " err/frm:" << error_/frames_
+     << "\n";
+  return ss.str();
+}
+
+
+/*
+ * CrossEntropy
+ */
+
+///Find maximum in float array
+inline int FindMaxId(const BaseFloat* ptr, size_t N) {
+  BaseFloat mval = -1e20f;
+  int mid = -1;
+  for(size_t i=0; i<N; i++) {
+    if(ptr[i] > mval) {
+      mid = i; mval = ptr[i];
+    }
+  }
+  return mid;
+}
+
+
+void
+CrossEntropy::Evaluate(const Matrix<BaseFloat>& net_out, const Matrix<BaseFloat>& target, Matrix<BaseFloat>* err)
+{
+  //check dimensions
+  assert(net_out.Rows() == target.Rows());
+  assert(net_out.Cols() == target.Cols());
+  if(err->Rows() != net_out.Rows() || err->Cols() != net_out.Cols()) {
+    err->Init(net_out.Rows(),net_out.Cols());
+  }
+
+  //allocate confunsion buffers
+  if(confusion_mode_ != NO_CONF) {
+    if(confusion_.Rows() != target.Cols() || confusion_.Cols() != target.Cols()) {
+      confusion_.Init(target.Cols(),target.Cols());
+      confusion_count_.Init(target.Cols());
+      diag_confusion_.Init(target.Cols());
+    }
+  }
+
+  //compute global gradient (assuming on softmax input)
+  err->Copy(net_out);
+  err->AddScaled(-1,target);
+
+  //collect max values
+  std::vector<size_t> max_target_id(target.Rows());
+  std::vector<size_t> max_netout_id(target.Rows());
+  //check correct classification
+  int corr = 0;
+  for(size_t r=0; r<net_out.Rows(); r++) {
+    int id_netout = FindMaxId(net_out[r].pData(),net_out.Cols());
+    int id_target = FindMaxId(target[r].pData(),target.Cols());
+    if(id_netout == id_target) corr++;
+    max_target_id[r] = id_target;//store the max value
+    max_netout_id[r] = id_netout;
+  }
+
+  //compute loss function
+  double sumerr = 0;
+  for(size_t r=0; r<net_out.Rows(); r++) {
+    if(target(r,max_target_id[r]) == 1.0) {
+      //pick the max value..., rest is zero
+      BaseFloat val = log(net_out(r,max_target_id[r]));
+      if(val < -1e10f) val = -1e10f;
+      sumerr += val;
+    } else {
+      //process whole posterior vect.
+      for(size_t c=0; c<net_out.Cols(); c++) {
+        if(target(r,c) != 0.0) {
+          BaseFloat val = target(r,c)*log(net_out(r,c));
+          if(val < -1e10f) val = -1e10f;
+          sumerr += val;
+        }
+      }
+    }
+  }
+
+  //accumulate confusuion network
+  if(confusion_mode_ != NO_CONF) {
+    for(size_t r=0; r<net_out.Rows(); r++) {
+      int id_target = max_target_id[r];
+      int id_netout = max_netout_id[r];
+      switch(confusion_mode_) {
+        case MAX_CONF:
+          confusion_(id_target,id_netout) += 1;
+          break;
+        case SOFT_CONF:
+          confusion_[id_target].Add(net_out[r]);
+          break;
+        case DIAG_MAX_CONF:
+          diag_confusion_[id_target] += ((id_target==id_netout)?1:0);
+          break;
+        case DIAG_SOFT_CONF:
+          diag_confusion_[id_target] += net_out[r][id_target];
+          break;
+        default:
+          KALDI_ERR << "unknown confusion type" << confusion_mode_;
+      }
+      confusion_count_[id_target] += 1;
+    }
+  }
+
+  error_ -= sumerr;
+  frames_ += net_out.Rows();
+  corr_ += corr;
+}
+
+
+std::string CrossEntropy::Report() {
+  std::stringstream ss;
+  ss << "Xent:" << error_ << " frames:" << frames_
+     << " err/frm:" << error_/frames_
+     << " correct[" << 100.0*corr_/frames_ << "%]"
+     << "\n";
+
+  if(confusion_mode_ != NO_CONF) {
+    //read class tags
+    std::vector<std::string> tag;
+    { 
+      std::ifstream ifs(output_label_map_);
+      assert(ifs.good());
+      std::string str;
+      while(!ifs.eof()) {
+        ifs >> str;
+        tag.push_back(str);
+      }
+    }
+    assert(confusion_count_.Dim() <= tag.size());
+
+    //print confusion matrix
+    if(confusion_mode_ == MAX_CONF || confusion_mode_ == SOFT_CONF) {
+      ss << "Row:label Col:hyp\n" << confusion_ << "\n";
+    }
+    
+    //***print per-target accuracies
+    for(int i=0; i<confusion_count_.Dim(); i++) {
+      //get the numerator
+      BaseFloat numerator = 0.0;
+      switch (confusion_mode_) {
+        case MAX_CONF: case SOFT_CONF:
+          numerator = confusion_[i][i];
+          break;
+        case DIAG_MAX_CONF: case DIAG_SOFT_CONF:
+          numerator = diag_confusion_[i];
+          break;
+        default:
+          KALDI_ERR << "Usupported confusion mode:" << confusion_mode_;
+      }
+      //add line to report
+      ss << std::setw(30) << tag[i] << " " 
+         << std::setw(10) << 100.0*numerator/confusion_count_[i] << "%" 
+         << " [" << numerator << "/" << confusion_count_[i] << "]\n";
+    } //***print per-target accuracies
+  }// != NO_CONF
+
+  return ss.str();
+}
+
+
+void CrossEntropy::MergeStats(const ObjectiveFunction& inst) { 
+  const CrossEntropy& xent = dynamic_cast<const CrossEntropy&>(inst);
+  frames_ += xent.frames_; error_ += xent.error_; corr_ += xent.corr_;
+  //sum the confustion statistics
+  if(confusion_mode_ != NO_CONF) {
+    if(confusion_.Rows() != xent.confusion_.Rows()) {
+      confusion_.Init(xent.confusion_.Rows(),xent.confusion_.Cols());
+      confusion_count_.Init(xent.confusion_count_.Dim());
+      diag_confusion_.Init(xent.diag_confusion_.Dim());
+    }
+    confusion_.Add(xent.confusion_);
+    confusion_count_.Add(xent.confusion_count_);
+    diag_confusion_.Add(xent.diag_confusion_);
+  }
+}
+ 
+
+} // namespace TNet
diff --git a/src/TNetLib/.svn/text-base/ObjFun.h.svn-base b/src/TNetLib/.svn/text-base/ObjFun.h.svn-base
new file mode 100644
index 0000000..c458340
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/ObjFun.h.svn-base
@@ -0,0 +1,160 @@
+#ifndef _TNET_OBJ_FUN_H
+#define _TNET_OBJ_FUN_H
+
+#include <cassert>
+#include <limits>
+#include <cmath>
+
+#include "Matrix.h"
+#include "Vector.h"
+
+namespace TNet {
+
+  /**
+   * General interface for objective functions
+   */
+  class ObjectiveFunction
+  {
+    public:
+    /// Enum with objective function types
+    typedef enum { 
+      OBJ_FUN_I = 0x0300, 
+      MEAN_SQUARE_ERROR, 
+      CROSS_ENTROPY, 
+    } ObjFunType;
+    
+    public:
+      /// Factory for creating objective function instances
+      static ObjectiveFunction* Factory(ObjFunType type);
+    
+    //////////////////////////////////////////////////////////////
+    // Interface specification
+    protected:
+      ObjectiveFunction() { }; /// constructor
+    public:
+      virtual ~ObjectiveFunction() { };  /// destructor
+
+      virtual ObjFunType GetType() = 0;
+      virtual const char* GetName() = 0;
+      virtual ObjectiveFunction* Clone() = 0; 
+
+      ///calculate error of network output
+      virtual void Evaluate(const Matrix<BaseFloat>& net_out, const Matrix<BaseFloat>& target, Matrix<BaseFloat>* err) = 0;
+ 
+      ///get the accumulated error
+      virtual double GetError() = 0;
+      ///the number of processed frames 
+      virtual size_t GetFrames() = 0;
+       
+      ///report the error to string 
+      virtual std::string Report() = 0;     
+
+      ///sum the frame counts from more instances
+      virtual void MergeStats(const ObjectiveFunction& inst) = 0;
+  };
+
+
+
+  /**
+   * Mean square error function
+   */
+  class MeanSquareError : public ObjectiveFunction
+  {
+   public:
+    MeanSquareError()
+     : ObjectiveFunction(), frames_(0), error_(0)
+    { }
+
+    ~MeanSquareError()
+    { }
+
+    ObjFunType GetType()
+    { return MEAN_SQUARE_ERROR; }
+
+    const char* GetName()
+    { return "<MeanSquareError>"; }
+
+    ObjectiveFunction* Clone()
+    { return new MeanSquareError(*this); }
+    
+    void Evaluate(const Matrix<BaseFloat>& net_out, const Matrix<BaseFloat>& target, Matrix<BaseFloat>* err);
+
+    size_t GetFrames()
+    { return frames_; }
+    
+    double GetError()
+    { return error_; }
+
+    std::string Report();    
+     
+    void MergeStats(const ObjectiveFunction& inst) { 
+      const MeanSquareError& mse = dynamic_cast<const MeanSquareError&>(inst);
+      frames_ += mse.frames_; error_ += mse.error_; 
+    }
+   
+   private:
+    size_t frames_;
+    double error_;
+
+  };
+
+
+  /**
+   * Cross entropy error function
+   */
+  class CrossEntropy : public ObjectiveFunction
+  {
+   public:
+    enum ConfusionMode { NO_CONF=0, MAX_CONF, SOFT_CONF, DIAG_MAX_CONF, DIAG_SOFT_CONF };
+
+   public:
+    CrossEntropy()
+     : ObjectiveFunction(), frames_(0), error_(0), corr_(0), confusion_mode_(NO_CONF), output_label_map_(NULL)
+    { }
+
+    ~CrossEntropy()
+    { }
+
+    ObjFunType GetType()
+    { return CROSS_ENTROPY; }
+
+    const char* GetName() 
+    { return "<cross_entropy>"; }
+
+    ObjectiveFunction* Clone()
+    { return new CrossEntropy(*this); }
+
+    void Evaluate(const Matrix<BaseFloat>& net_out, const Matrix<BaseFloat>& target, Matrix<BaseFloat>* err);
+
+    size_t GetFrames()
+    { return frames_; }
+    
+    double GetError()
+    { return error_; }
+
+    void SetConfusionMode(enum ConfusionMode m)
+    { confusion_mode_ = m; }
+
+    void SetOutputLabelMap(const char* map)
+    { output_label_map_ = map; }
+
+    std::string Report();    
+     
+    void MergeStats(const ObjectiveFunction& inst);   
+   private:
+    size_t frames_;
+    double error_;
+    size_t corr_;
+ 
+    ConfusionMode confusion_mode_;
+    Matrix<float> confusion_;
+    Vector<int> confusion_count_;
+    Vector<double> diag_confusion_;
+    const char* output_label_map_;
+  };
+ 
+
+} //namespace TNet
+
+
+#endif
diff --git a/src/TNetLib/.svn/text-base/Platform.h.svn-base b/src/TNetLib/.svn/text-base/Platform.h.svn-base
new file mode 100644
index 0000000..66ebacb
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Platform.h.svn-base
@@ -0,0 +1,397 @@
+#ifndef _TNET_PLATFORM_H
+#define _TNET_PLATFORM_H
+
+#include "Thread.h"
+#include "Matrix.h"
+
+#include "Features.h"
+#include "Labels.h"
+
+#include "Cache.h"
+#include "Nnet.h"
+#include "ObjFun.h"
+
+#include "Mutex.h"
+#include "Semaphore.h"
+#include "Barrier.h"
+#include "Thread.h"
+
+#include <vector>
+#include <list>
+#include <iterator>
+
+namespace TNet {
+
+class PlatformThread;
+
+class Platform {
+
+/*
+* Variables to be initialized directly from the main function
+*/
+public:
+  FeatureRepository feature_;
+  LabelRepository label_;
+
+  Network nnet_transf_;
+  Network nnet_;
+  ObjectiveFunction* obj_fun_;
+
+  int bunchsize_;
+  int cachesize_;
+  bool randomize_;
+   
+  int start_frm_ext_;
+  int end_frm_ext_;
+
+  int trace_;
+  bool crossval_;
+  
+  long int seed_;
+
+ /*
+  * Variables to be used internally during the multi-threaded training
+  */
+ private:
+  Semaphore semaphore_read_;
+ 
+  std::vector<std::list<Matrix<BaseFloat>*> > feature_buf_;
+  std::vector<std::list<Matrix<BaseFloat>*> > label_buf_;
+  std::vector<Mutex> mutex_buf_;
+
+  std::vector<Network*> nnet_transf2_;
+
+  std::vector<Cache> cache_;
+
+  std::vector<Network*> nnet2_;
+  std::vector<ObjectiveFunction*> obj_fun2_;
+  std::vector<bool> sync_mask_;
+
+  Barrier barrier_;
+  bool end_reading_;
+  std::vector<Timer> tim_;
+  std::vector<double> tim_accu_;
+
+  int num_thr_;
+  Semaphore semaphore_endtrain_;
+  Semaphore semaphore_endtrain2_;
+
+ public:
+  Mutex cout_mutex_;
+
+ /*
+  * Methods
+  */
+ public:
+  Platform()
+   : bunchsize_(0), cachesize_(0), randomize_(false),
+     start_frm_ext_(0), end_frm_ext_(0), trace_(0),
+     crossval_(false), seed_(0),
+     end_reading_(false), num_thr_(0)
+  { }
+
+  ~Platform()
+  {
+    for(size_t i=0; i<nnet_transf2_.size(); i++) {
+      delete nnet_transf2_[i];
+    }
+    for(size_t i=0; i<nnet2_.size(); i++) {
+      delete nnet2_[i];
+    }
+    for(size_t i=0; i<obj_fun2_.size(); i++) {
+      delete obj_fun2_[i];
+    }
+  }
+ 
+  /// Run the training using num_threads threads
+  void RunTrain(int num_threads);
+
+ private:
+  /// The data-reading thread
+  void ReadData();
+  /// The training thread
+  void Thread(int thr);
+
+ friend class PlatformThread;
+};
+
+
+
+/**
+ * Inherit Thread for the training threads
+ */
+class PlatformThread : public Thread {
+ public:
+  PlatformThread(Platform* pf)
+   : platform_(*pf)
+  { }
+ 
+ private:
+  void Execute(void* arg) {
+    long long thr_id = reinterpret_cast<long long>(arg);
+    platform_.Thread(thr_id);
+  }
+   
+ private:
+  Platform& platform_;
+};
+
+
+
+
+
+void Platform::RunTrain(int num_thr) {
+  num_thr_ = num_thr;
+  
+  /*
+   * Initialize parallel training
+   */
+  feature_buf_.resize(num_thr);
+  label_buf_.resize(num_thr);
+  mutex_buf_.resize(num_thr);
+  cache_.resize(num_thr);
+  sync_mask_.resize(num_thr);
+  barrier_.SetThreshold(num_thr);
+
+  tim_.resize(num_thr);
+  tim_accu_.resize(num_thr,0.0);
+
+  int bunchsize = bunchsize_/num_thr;
+  int cachesize = (cachesize_/num_thr/bunchsize)*bunchsize;
+  std::cout << "Bunchsize:" << bunchsize << "*" << num_thr << "=" << bunchsize*num_thr
+            << " Cachesize:" << cachesize << "*" << num_thr << "=" << cachesize*num_thr << "\n";
+  for(int i=0; i<num_thr; i++) {
+    //clone transforms
+    nnet_transf2_.push_back(nnet_transf_.Clone()); 
+    //create cache
+    cache_[i].Init(cachesize,bunchsize,seed_);
+    cache_[i].Trace(trace_);
+    //clone networks
+    nnet2_.push_back(nnet_.Clone());
+    //clone objective function objects
+    obj_fun2_.push_back(obj_fun_->Clone());
+    //enable threads to sync weights
+    sync_mask_[i] = true;
+  }
+
+  /*
+   * Run training threads
+   */
+  std::vector<PlatformThread*> threads;
+  for(intptr_t i=0; i<num_thr; i++) {
+    PlatformThread* t = new PlatformThread(this);
+    t->Start(reinterpret_cast<void*>(i));
+    threads.push_back(t);
+  }
+
+  /*
+   * Read the training data
+   */
+  ReadData();
+
+  /*
+   * Wait for training to finish
+   */
+  semaphore_endtrain2_.Wait(); 
+
+}
+
+
+
+void Platform::ReadData() try {
+  cout_mutex_.Lock();  
+  std::cout << "queuesize " << feature_.QueueSize() << "\n";
+  cout_mutex_.Unlock();  
+
+  int thr = 0;
+  for(feature_.Rewind();!feature_.EndOfList();feature_.MoveNext()) {
+    Matrix<BaseFloat>* fea = new Matrix<BaseFloat>;
+    Matrix<BaseFloat>* lab = new Matrix<BaseFloat>;
+
+    feature_.ReadFullMatrix(*fea);
+    label_.GenDesiredMatrix(*lab,
+                            fea->Rows()-start_frm_ext_-end_frm_ext_,
+                            feature_.CurrentHeader().mSamplePeriod,
+                            feature_.Current().Logical().c_str());
+
+    
+    fea->CheckData(feature_.Current().Logical());
+
+    mutex_buf_[thr].Lock();
+    feature_buf_[thr].push_back(fea);
+    label_buf_[thr].push_back(lab);
+    mutex_buf_[thr].Unlock();
+
+    //suspend reading when shortest buffer has 50 matrices
+    if(thr == 0) {
+      int minsize=1e6;
+      for(size_t i=0; i<feature_buf_.size(); i++) {
+        int s = feature_buf_[i].size();
+        if(s < minsize) minsize = s;
+      }
+      if(minsize > 20) semaphore_read_.Wait();
+    }
+
+    thr = (thr+1) % num_thr_;
+  }
+
+  std::cout << "[Reading finished]\n" << std::flush; 
+  end_reading_ = true;
+
+} catch (std::exception& rExc) {
+  std::cerr << "Exception thrown" << std::endl;
+  std::cerr << rExc.what() << std::endl;
+  exit(1);
+}
+
+void Platform::Thread(int thr_id) try {
+
+  const int thr = thr_id; //make id const for safety!
+
+  while(1) {
+    //fill the cache
+    while(!cache_[thr].Full() && !(end_reading_ && (feature_buf_[thr].size() == 0))) {
+
+      if(feature_buf_[thr].size() <= 5) {
+        semaphore_read_.Post();//wake the reader
+      }
+      if(feature_buf_[thr].size() == 0) {
+        cout_mutex_.Lock();  
+        std::cout << "Thread" << thr << ",waiting for data\n";
+        cout_mutex_.Unlock();  
+        sleep(1);
+      } else {
+        //get the matrices
+        mutex_buf_[thr].Lock();
+        Matrix<BaseFloat>* fea = feature_buf_[thr].front();
+        Matrix<BaseFloat>* lab = label_buf_[thr].front();
+        feature_buf_[thr].pop_front();
+        label_buf_[thr].pop_front();
+        mutex_buf_[thr].Unlock();
+
+        //transform the features
+        Matrix<BaseFloat> fea_transf;
+        nnet_transf2_[thr]->Propagate(*fea,fea_transf);
+
+        //trim the ext
+        SubMatrix<BaseFloat> fea_trim(
+          fea_transf,
+          start_frm_ext_,
+          fea_transf.Rows()-start_frm_ext_-end_frm_ext_,
+          0,
+          fea_transf.Cols()
+        );
+
+        //add to cache
+        cache_[thr].AddData(fea_trim,*lab);
+
+        delete fea; delete lab;
+      }
+    }
+
+    //no more data, end training...
+    if(cache_[thr].Empty()) break;
+
+    if(randomize_) { cache_[thr].Randomize(); }
+
+
+    //std::cout << "Thread" << thr << ", Cache#" << nr_cache++ << "\n";
+
+    //train from cache
+    Matrix<BaseFloat> fea2,lab2,out,err;
+    while(!cache_[thr].Empty()) {
+      cache_[thr].GetBunch(fea2,lab2);
+      nnet2_[thr]->Propagate(fea2,out);
+      obj_fun2_[thr]->Evaluate(out,lab2,&err);
+
+      if(!crossval_) {
+        nnet2_[thr]->Backpropagate(err);
+
+         tim_[thr].Start();
+        barrier_.Wait();//*********/
+         tim_[thr].End(); tim_accu_[thr] += tim_[thr].Val();
+       
+        //sum the gradient and bunchsize
+        for(int i=0; i<num_thr_; i++) {
+          if(sync_mask_[i]) {
+            nnet_.AccuGradient(*nnet2_[i],thr,num_thr_);
+            if(thr == 0) nnet_.AccuBunchsize(*nnet2_[i]);
+          }
+        }
+
+         tim_[thr].Start();
+        barrier_.Wait();//*********/
+         tim_[thr].End(); tim_accu_[thr] += tim_[thr].Val();
+
+        //update
+        nnet_.Update(thr,num_thr_);
+       
+         tim_[thr].Start();
+        barrier_.Wait();//*********/
+         tim_[thr].End(); tim_accu_[thr] += tim_[thr].Val();
+
+        //reset the bunchsize counter
+        if(thr == 0) nnet_.ResetBunchsize();
+      }
+    }
+
+  }
+
+  std::cout << "Thread" << thr << " end of data\n";
+  
+  //deactivate threads' update from summing
+  sync_mask_[thr] = false;
+  //increase number of finished threads
+  semaphore_endtrain_.Post();
+   
+  //synchronize the updates of other threads
+  while(1) {
+    barrier_.Wait();//*********/
+    if(semaphore_endtrain_.GetValue() == num_thr_) break;
+        
+    //sum the gradient and bunchsize
+    for(int i=0; i<num_thr_; i++) {
+      if(sync_mask_[i]) {
+        nnet_.AccuGradient(*nnet2_[i],thr,num_thr_);
+        if(thr == 0) nnet_.AccuBunchsize(*nnet2_[i]);
+      }
+    }
+    barrier_.Wait();//*********/
+    //update
+    nnet_.Update(thr,num_thr_);
+    barrier_.Wait();//*********/
+    //reset bunchsize counter
+    if(thr == 0) nnet_.ResetBunchsize();
+  }
+
+  //finally merge objfun stats
+  if(thr == 0) {
+    for(int i=0; i<num_thr_; i++) {
+      obj_fun_->MergeStats(*obj_fun2_[i]);
+    }
+    
+    cout_mutex_.Lock();
+    std::cout << "Barrier waiting times per thread\n"; 
+    std::copy(tim_accu_.begin(),tim_accu_.end(),std::ostream_iterator<double>(std::cout," "));
+    std::cout << "\n";
+    cout_mutex_.Unlock();
+  }
+
+  cout_mutex_.Lock();
+  std::cout << "[Thread" << thr << " finished]\n";
+  cout_mutex_.Unlock();
+
+  if(thr == 0) {
+    semaphore_endtrain2_.Post();
+  }
+} catch (std::exception& rExc) {
+  std::cerr << "Exception thrown" << std::endl;
+  std::cerr << rExc.what() << std::endl;
+  exit(1);
+}
+
+
+
+}//namespace TNet
+
+#endif
diff --git a/src/TNetLib/.svn/text-base/Semaphore.cc.svn-base b/src/TNetLib/.svn/text-base/Semaphore.cc.svn-base
new file mode 100644
index 0000000..d149fb3
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Semaphore.cc.svn-base
@@ -0,0 +1,64 @@
+
+#include "Semaphore.h"
+
+namespace TNet {
+  
+  Semaphore::
+  Semaphore(int initValue) 
+  {
+    mSemValue = initValue;
+    pthread_mutex_init(&mMutex, NULL);
+    pthread_cond_init(&mCond, NULL);
+  }
+
+  Semaphore::
+  ~Semaphore()
+  {
+    pthread_mutex_destroy(&mMutex);
+    pthread_cond_destroy(&mCond);
+  }
+
+  int 
+  Semaphore::
+  TryWait()
+  {
+    pthread_mutex_lock(&mMutex);
+    if(mSemValue > 0) {
+      mSemValue--;
+      pthread_mutex_unlock(&mMutex);
+      return 0;
+    }
+    pthread_mutex_unlock(&mMutex);
+    return -1;
+  }
+
+  void 
+  Semaphore::
+  Wait()
+  {
+    pthread_mutex_lock(&mMutex);
+    while(mSemValue <= 0) {
+      pthread_cond_wait(&mCond, &mMutex);
+    }
+    mSemValue--;
+    pthread_mutex_unlock(&mMutex);
+  }
+
+  void
+  Semaphore::
+  Post()
+  {
+    pthread_mutex_lock(&mMutex);
+    mSemValue++;
+    pthread_cond_signal(&mCond);
+    pthread_mutex_unlock(&mMutex);
+  }
+
+  int
+  Semaphore::
+  GetValue()
+  { return mSemValue; }
+
+
+
+} //namespace
diff --git a/src/TNetLib/.svn/text-base/Semaphore.h.svn-base b/src/TNetLib/.svn/text-base/Semaphore.h.svn-base
new file mode 100644
index 0000000..a28ee44
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Semaphore.h.svn-base
@@ -0,0 +1,26 @@
+#ifndef _SEMPAHORE_H_
+#define _SEMPAHORE_H_
+
+#include <pthread.h>
+
+namespace TNet {
+  
+  class Semaphore {
+    public:
+      Semaphore(int initValue = 0); 
+      ~Semaphore();
+
+      int TryWait();
+      void Wait();
+      void Post();
+      int GetValue();
+
+    private:
+      int mSemValue;
+      pthread_mutex_t mMutex;
+      pthread_cond_t mCond;
+
+  };
+} //namespace
+
+#endif
diff --git a/src/TNetLib/.svn/text-base/SharedLinearity.cc.svn-base b/src/TNetLib/.svn/text-base/SharedLinearity.cc.svn-base
new file mode 100644
index 0000000..108212c
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/SharedLinearity.cc.svn-base
@@ -0,0 +1,277 @@
+
+
+#include "SharedLinearity.h"
+#include "cblas.h"
+
+namespace TNet {
+
+void 
+SharedLinearity::
+PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+{
+  //precopy bias
+  for(int k=0; k<mNInstances; k++) {
+    for(size_t r=0; r<X.Rows(); r++) {
+      memcpy(Y.pRowData(r)+k*mpBias->Dim(),mpBias->pData(),mpBias->Dim()*sizeof(BaseFloat));
+    }
+  }
+  
+  //multiply blockwise
+  for(int k=0; k<mNInstances; k++) {
+    SubMatrix<BaseFloat> xblock(X,0,X.Rows(),k*mpLinearity->Rows(),mpLinearity->Rows());
+    SubMatrix<BaseFloat> yblock(Y,0,Y.Rows(),k*mpLinearity->Cols(),mpLinearity->Cols());
+    yblock.BlasGemm(1.0,xblock,NO_TRANS,*mpLinearity,NO_TRANS,1.0);
+  }
+}
+
+
+void 
+SharedLinearity::
+BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+{
+  for(int k=0; k<mNInstances; k++) {
+    SubMatrix<BaseFloat> xblock(X,0,X.Rows(),k*mpLinearity->Cols(),mpLinearity->Cols());
+    SubMatrix<BaseFloat> yblock(Y,0,Y.Rows(),k*mpLinearity->Rows(),mpLinearity->Rows());
+    yblock.BlasGemm(1.0,xblock,NO_TRANS,*mpLinearity,TRANS,1.0);
+  }
+}
+
+#if 0
+void 
+SharedLinearity::
+AccuUpdate() 
+{
+  BaseFloat N = 1;
+  /* 
+  //Not part of the interface!!!
+  if(mGradDivFrm) {
+    N = static_cast<BaseFloat>(GetInput().Rows());
+  }
+  */
+  BaseFloat mmt_gain = static_cast<BaseFloat>(1.0/(1.0-mMomentum));
+  N *= mmt_gain; //compensate higher gradient estimates due to momentum 
+  
+  //compensate augmented dyn. range of gradient caused by multiple instances
+  N *= static_cast<BaseFloat>(mNInstances); 
+
+  const Matrix<BaseFloat>& X = GetInput().Data();
+  const Matrix<BaseFloat>& E = GetErrorInput().Data();
+  //get gradient of shared linearity
+  for(int k=0; k<mNInstances; k++) {
+    SubMatrix<BaseFloat> xblock(X,0,X.Rows(),k*mLinearity.Rows(),mLinearity.Rows());
+    SubMatrix<BaseFloat> eblock(E,0,E.Rows(),k*mLinearity.Cols(),mLinearity.Cols());
+    mLinearityCorrection.BlasGemm(1.0,xblock,TRANS,eblock,NO_TRANS,((k==0)?mMomentum:1.0f));
+  }
+
+  //get gradient of shared bias
+  mBiasCorrection.Scale(mMomentum);
+  for(int r=0; r<E.Rows(); r++) {
+    for(int c=0; c<E.Cols(); c++) {
+      mBiasCorrection[c%mBiasCorrection.Dim()] += E(r,c);
+    }
+  }
+
+  //perform update 
+  mLinearity.AddScaled(-mLearningRate/N,mLinearityCorrection);
+  mBias.AddScaled(-mLearningRate/N,mBiasCorrection);
+  
+  //regularization weight decay
+  mLinearity.AddScaled(-mLearningRate*mWeightcost,mLinearity);
+}
+#endif
+
+void
+SharedLinearity::
+ReadFromStream(std::istream& rIn)
+{
+  //number of instances of shared weights in layer
+  rIn >> std::ws >> mNInstances;
+  if(mNInstances < 1) {
+    std::ostringstream os;
+    os << "Bad number of instances:" << mNInstances;
+    Error(os.str());
+  }
+  if(GetNInputs() % mNInstances != 0 || GetNOutputs() % mNInstances != 0) {
+    std::ostringstream os;
+    os << "Number of Inputs/Outputs must be divisible by number of instances"
+       << " Inputs:" << GetNInputs()
+       << " Outputs" << GetNOutputs()
+       << " Intances:" << mNInstances;
+    Error(os.str());
+  }
+    
+  //matrix is stored transposed as SNet does
+  BfMatrix transpose;
+  rIn >> transpose;
+  mLinearity = BfMatrix(transpose, TRANS);
+  //biases stored normally
+  rIn >> mBias;
+
+  if(transpose.Cols()*transpose.Rows() == 0) {
+    Error("Missing linearity matrix in network file");
+  }
+  if(mBias.Dim() == 0) {
+    Error("Missing bias vector in network file");
+  }
+
+
+  if(mLinearity.Cols() != (GetNOutputs() / mNInstances) || 
+     mLinearity.Rows() != (GetNInputs() / mNInstances) ||
+     mBias.Dim() != (GetNOutputs() / mNInstances)
+  ){
+    std::ostringstream os;
+    os << "Wrong dimensionalities of matrix/vector in network file\n"
+       << "Inputs:" << GetNInputs()
+       << " Outputs:" << GetNOutputs()
+       << "\n"
+       << "N-Instances:" << mNInstances
+       << "\n"
+       << "linearityCols:" << mLinearity.Cols() << "(" << mLinearity.Cols()*mNInstances << ")"
+       << " linearityRows:" << mLinearity.Rows() << "(" << mLinearity.Rows()*mNInstances << ")"
+       << " biasDims:" << mBias.Dim() << "(" << mBias.Dim()*mNInstances << ")"
+       << "\n";
+    Error(os.str());
+  }
+
+  mLinearityCorrection.Init(mLinearity.Rows(),mLinearity.Cols());
+  mBiasCorrection.Init(mBias.Dim());
+}
+
+ 
+void
+SharedLinearity::
+WriteToStream(std::ostream& rOut)
+{
+  rOut << mNInstances << std::endl;
+  //matrix is stored transposed as SNet does
+  BfMatrix transpose(mLinearity, TRANS);
+  rOut << transpose;
+  //biases stored normally
+  rOut << mBias;
+  rOut << std::endl;
+}
+
+
+void 
+SharedLinearity::
+Gradient() 
+{
+  const Matrix<BaseFloat>& X = GetInput();
+  const Matrix<BaseFloat>& E = GetErrorInput();
+  //get gradient of shared linearity
+  for(int k=0; k<mNInstances; k++) {
+    SubMatrix<BaseFloat> xblock(X,0,X.Rows(),k*mpLinearity->Rows(),mpLinearity->Rows());
+    SubMatrix<BaseFloat> eblock(E,0,E.Rows(),k*mpLinearity->Cols(),mpLinearity->Cols());
+    mLinearityCorrection.BlasGemm(1.0,xblock,TRANS,eblock,NO_TRANS,((k==0)?0.0f:1.0f));
+  }
+
+  //get gradient of shared bias
+  mBiasCorrection.Set(0.0f);
+  for(int r=0; r<E.Rows(); r++) {
+    for(int c=0; c<E.Cols(); c++) {
+      mBiasCorrection[c%mBiasCorrection.Dim()] += E(r,c);
+    }
+  }
+}
+
+
+void 
+SharedLinearity::
+AccuGradient(const UpdatableComponent& src, int thr, int thrN)
+{
+  //cast the argument
+  const SharedLinearity& src_comp = dynamic_cast<const SharedLinearity&>(src);
+
+  //allocate accumulators when needed
+  if(mLinearityCorrectionAccu.MSize() == 0) {
+    mLinearityCorrectionAccu.Init(mpLinearity->Rows(),mpLinearity->Cols());
+  }
+  if(mBiasCorrectionAccu.MSize() == 0) {
+    mBiasCorrectionAccu.Init(mpBias->Dim());
+  }
+ 
+
+  //assert the dimensions
+  /*
+  assert(mLinearityCorrection.Rows() == src_comp.mLinearityCorrection.Rows());
+  assert(mLinearityCorrection.Cols() == src_comp.mLinearityCorrection.Cols());
+  assert(mBiasCorrection.Dim() == src_comp.mBiasCorrection.Dim());
+  */
+
+  //need to find out which rows to sum...
+  int div = mLinearityCorrection.Rows() / thrN;
+  int mod = mLinearityCorrection.Rows() % thrN;
+
+  int origin = thr * div + ((mod > thr)? thr : mod);
+  int rows = div + ((mod > thr)? 1 : 0);
+
+  //std::cout << "[S" << thr << "," << origin << "," << rows << "]" << std::flush;
+
+  //create the matrix windows
+  const SubMatrix<BaseFloat> src_mat (
+    src_comp.mLinearityCorrection, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+  SubMatrix<double> tgt_mat (
+    mLinearityCorrectionAccu, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+  //sum the rows
+  Add(tgt_mat,src_mat);
+
+  //first thread will always sum the bias correction and adds frame count
+  if(thr == 0) {
+    //std::cout << "[BS" << thr << "]" << std::flush;
+    Add(mBiasCorrectionAccu,src_comp.mBiasCorrection);
+  }
+}
+
+
+void 
+SharedLinearity::
+Update(int thr, int thrN) 
+{
+  //need to find out which rows to sum...
+  int div = mLinearity.Rows() / thrN;
+  int mod = mLinearity.Rows() % thrN;
+
+  int origin = thr * div + ((mod > thr)? thr : mod);
+  int rows = div + ((mod > thr)? 1 : 0);
+
+  //std::cout << "[P" << thr << "," << origin << "," << rows << "]" << std::flush;
+
+  //get the matrix windows
+  SubMatrix<double> src_mat (
+    mLinearityCorrectionAccu, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+  SubMatrix<BaseFloat> tgt_mat (
+    mLinearity, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+
+  //TODO perform L2 regularization
+  //tgt_mat.AddScaled(tgt_mat, -mWeightcost * num_frames);
+
+  //update weights
+  AddScaled(tgt_mat, src_mat, -mLearningRate/static_cast<BaseFloat>(mNInstances));
+
+  //first thread always update bias
+  if(thr == 0) {
+    //std::cout << "[" << thr << "BP]" << std::flush;
+    AddScaled(mBias, mBiasCorrectionAccu, -mLearningRate/static_cast<BaseFloat>(mNInstances));
+  }
+
+  //reset the accumulators
+  src_mat.Zero();
+  if(thr == 0) {
+    mBiasCorrectionAccu.Zero();
+  }
+}
+
+ 
+} //namespace
diff --git a/src/TNetLib/.svn/text-base/SharedLinearity.h.svn-base b/src/TNetLib/.svn/text-base/SharedLinearity.h.svn-base
new file mode 100644
index 0000000..83feeee
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/SharedLinearity.h.svn-base
@@ -0,0 +1,103 @@
+#ifndef _CUSHARED_LINEARITY_H_
+#define _CUSHARED_LINEARITY_H_
+
+
+#include "Component.h"
+
+#include "Matrix.h"
+#include "Vector.h"
+
+
+namespace TNet {
+
+class SharedLinearity : public UpdatableComponent
+{
+ public:
+  SharedLinearity(size_t nInputs, size_t nOutputs, Component *pPred); 
+  ~SharedLinearity();  
+  
+  ComponentType GetType() const 
+  { return SHARED_LINEARITY; }
+
+  const char* GetName() const
+  { return "<SharedLinearity>"; }
+
+  Component* Clone() const;
+
+  void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y);
+  void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y);
+
+  void ReadFromStream(std::istream& rIn);
+  void WriteToStream(std::ostream& rOut);
+
+  /// calculate gradient
+  void Gradient(); 
+  /// accumulate gradient from other components
+  void AccuGradient(const UpdatableComponent& src, int thr, int thrN);
+  /// update weights, reset the accumulator
+  void Update(int thr, int thrN);
+
+protected:
+  Matrix<BaseFloat> mLinearity;  ///< Matrix with neuron weights
+  Vector<BaseFloat> mBias;       ///< Vector with biases
+
+  Matrix<BaseFloat>* mpLinearity;
+  Vector<BaseFloat>* mpBias;
+
+  Matrix<BaseFloat> mLinearityCorrection; ///< Matrix for linearity updates
+  Vector<BaseFloat> mBiasCorrection;      ///< Vector for bias updates
+
+  Matrix<double> mLinearityCorrectionAccu; ///< Accumulator for linearity updates
+  Vector<double> mBiasCorrectionAccu;      ///< Accumulator for bias updates
+  
+  int mNInstances;
+};
+
+
+
+
+////////////////////////////////////////////////////////////////////////////
+// INLINE FUNCTIONS 
+// SharedLinearity::
+inline 
+SharedLinearity::
+SharedLinearity(size_t nInputs, size_t nOutputs, Component *pPred)
+  : UpdatableComponent(nInputs, nOutputs, pPred),
+    mpLinearity(&mLinearity), mpBias(&mBias), 
+    mNInstances(0)
+{ }
+
+
+inline
+SharedLinearity::
+~SharedLinearity()
+{ }
+
+
+inline
+Component*
+SharedLinearity::
+Clone() const
+{
+  SharedLinearity* ptr = new SharedLinearity(GetNInputs(),GetNOutputs(),NULL);
+  ptr->mpLinearity = mpLinearity;
+  ptr->mpBias = mpBias;
+
+  ptr->mLinearityCorrection.Init(mpLinearity->Rows(),mpLinearity->Cols());
+  ptr->mBiasCorrection.Init(mpBias->Dim());
+
+  ptr->mNInstances = mNInstances;
+
+  ptr->mLearningRate = mLearningRate;
+
+
+  return ptr;
+}
+
+
+
+} //namespace
+
+
+
+#endif
diff --git a/src/TNetLib/.svn/text-base/Thread.h.svn-base b/src/TNetLib/.svn/text-base/Thread.h.svn-base
new file mode 100644
index 0000000..ba6d7ba
--- /dev/null
+++ b/src/TNetLib/.svn/text-base/Thread.h.svn-base
@@ -0,0 +1,53 @@
+#ifndef _TNET_THREAD_H
+#define _TNET_THREAD_H
+
+namespace TNet {
+
+class Thread {
+ public:
+  Thread() 
+  { }
+  virtual ~Thread() 
+  { }
+
+  int Start(void* arg);
+
+ protected:
+  static void* EntryPoint(void*);
+  virtual void Execute(void*) = 0; ///< Override this function
+  void* Arg() const { return arg_; }
+  void Arg(void* a) { arg_ = a; }
+
+ private:
+  pthread_t thread_id_;
+  void * arg_;
+};
+
+int Thread::Start(void * arg) {
+  Arg(arg); // store user data
+ 
+  int ret=0;
+  //create thread as detached (don't wait for it)
+  pthread_attr_t tattr;
+  ret |= pthread_attr_init(&tattr);
+  ret |= pthread_attr_setdetachstate(&tattr,PTHREAD_CREATE_DETACHED);
+  ret |= pthread_create(&thread_id_, &tattr, &Thread::EntryPoint, this);
+  if(ret != 0) KALDI_ERR << "Failed to create thread";
+  return ret;
+}
+
+/*static */
+void* Thread::EntryPoint(void* pthis) try {
+  Thread* pt = (Thread*)pthis;
+  pt->Execute(pt->Arg());
+  return NULL;
+} catch (std::exception& rExc) {
+  std::cerr << "Exception thrown" << std::endl;
+  std::cerr << rExc.what() << std::endl;
+  exit(1);
+}
+
+
+} //namespace TNet
+
+#endif
diff --git a/src/TNetLib/Activation.cc b/src/TNetLib/Activation.cc
new file mode 100644
index 0000000..8e84190
--- /dev/null
+++ b/src/TNetLib/Activation.cc
@@ -0,0 +1,138 @@
+
+#include "Activation.h"
+
+
+namespace TNet {
+
+void Sigmoid::PropagateFnc(const BfMatrix& X, BfMatrix& Y) {
+  //Y = 1/(1+e^{-X})
+  for(size_t r=0; r<X.Rows(); r++) {
+    for(size_t c=0; c<X.Cols(); c++) {
+      Y(r,c) = 1.0f/(1.0f+exp(-X(r,c)));
+    }
+  }
+}
+
+
+void Sigmoid::BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y) {
+  const Matrix<BaseFloat>& out = GetOutput();
+  //Y = OUT*(1-OUT)*X //ODVOZENO
+  for(size_t r=0; r<X.Rows(); r++) {
+    for(size_t c=0; c<X.Cols(); c++) {
+      Y(r,c) = X(r,c)*out(r,c)*(1.0f-out(r,c));
+    }
+  }
+}
+
+
+
+void Softmax::PropagateFnc(const BfMatrix& X, BfMatrix& Y) {
+  //Y_j = e^X_j / sum_i(e^X_i)
+  //
+  //    e^(X_j+c) / sum_i(e^X_i+c)
+  //    = e^c.e^X_h / e^c.sum_i(e^X_i)
+  //    = e^X_j / sum_i(e^X_i)
+  //
+  size_t rows = X.Rows();
+  for(size_t i=0; i<rows; i++) {
+    BfSubVector y_i(Y[i]); //<< y_i gets pointer to i'th row of matrix Y
+    y_i.Copy(X[i]);
+    BaseFloat max = y_i.Max();
+    y_i.Subtract(max);
+    y_i.ApplyExp();
+    BaseFloat sum = y_i.Sum();
+    y_i.Scale(1.0f/sum);
+  }
+}
+
+
+void Softmax::BackpropagateFnc(const BfMatrix& X, BfMatrix& Y) {
+  //simply copy the error...,
+  Y.Copy(X);
+}
+
+
+void BlockSoftmax::ReadFromStream(std::istream& rIn) {
+  rIn >> mDim; 
+  mDimOffset.Init(mDim.Dim()+1);
+  
+  int off=0; 
+  for(int i=0; i<mDim.Dim(); i++) { 
+    mDimOffset[i]=off;
+    off += mDim[i];
+  }
+  mDimOffset[mDim.Dim()]=off;
+
+  if(off!=GetNOutputs()) {
+    KALDI_ERR << "Non-matching dimension of sum of softmaxes,"
+      << " the sum:" << off 
+      << " GetNOutputs:" << GetNOutputs();
+  }
+}
+
+void BlockSoftmax::WriteToStream(std::ostream& rOut) {
+  rOut << mDim;
+}
+
+
+
+
+void BlockSoftmax::PropagateFnc(const BfMatrix& X, BfMatrix& Y) {
+  //Y_j = e^X_j / sum_i(e^X_i)
+  //
+  //    e^(X_j+c) / sum_i(e^X_i+c)
+  //    = e^c.e^X_h / e^c.sum_i(e^X_i)
+  //    = e^X_j / sum_i(e^X_i)
+  //
+  size_t rows = X.Rows();
+  for(size_t i=0; i<rows; i++) {
+    BfSubVector y_i(Y[i]); //<< y_i gets pointer to i'th row of matrix Y
+    y_i.Copy(X[i]);
+    //BaseFloat max = y_i.Max();
+    //y_i.Subtract(max);
+    //y_i.ApplyExp();
+    //normalize separately on each softmax interval...
+    for(int j=0; j<mDim.Dim(); j++) {
+      BfSubVector y_i_smx_j(y_i.Range(mDimOffset[j],mDim[j]));
+      BaseFloat max = y_i_smx_j.Max();
+      y_i_smx_j.Subtract(max);
+      y_i_smx_j.ApplyExp();
+      BaseFloat sum = y_i_smx_j.Sum();
+      y_i_smx_j.Scale(1.0f/sum);
+    }
+  }
+
+//  X.CheckData("BlockSoftmax PropagateFnc X");
+//  Y.CheckData("BlockSoftmax PropagateFnc Y");
+}
+
+
+void BlockSoftmax::BackpropagateFnc(const BfMatrix& X, BfMatrix& Y) {
+  //set the output to zero
+  Y.Zero();
+  //copy only parts of the error
+  //from softmax intervals which sum up to 0.0, not 1.0
+  for(int i=0; i<X.Rows(); i++) {
+    for(int j=0; j<mDim.Dim(); j++) {
+      const BfSubVector x_i_smx_j(X[i].Range(mDimOffset[j],mDim[j]));
+      BaseFloat sum = x_i_smx_j.Sum();
+      if(sum > -0.1 && sum < 0.1) {
+        BfSubVector y_i_smx_j(Y[i].Range(mDimOffset[j],mDim[j]));
+        y_i_smx_j.Copy(x_i_smx_j);
+      } else if (sum > 0.9 && sum < 1.1) {
+        ; //do nothing
+      } else {
+        KALDI_ERR << "Invalid sum: " << sum;
+      }
+    }
+  }
+
+//  X.CheckData("BlockSoftmax BackpropagateFnc X");
+//  Y.CheckData("BlockSoftmax BackpropagateFnc Y");
+
+}
+
+
+
+} //namespace TNet
+
diff --git a/src/TNetLib/Activation.h b/src/TNetLib/Activation.h
new file mode 100644
index 0000000..90263d0
--- /dev/null
+++ b/src/TNetLib/Activation.h
@@ -0,0 +1,104 @@
+
+#ifndef _ACT_FUN_I_
+#define _ACT_FUN_I_
+
+
+#include "Component.h"
+
+
+namespace TNet
+{
+
+  /**
+   * Sigmoid activation function
+   */
+  class Sigmoid : public Component
+  {
+    public:
+      Sigmoid(size_t nInputs, size_t nOutputs, Component *pPred)
+       : Component(nInputs,nOutputs,pPred)
+      { }
+
+      ComponentType GetType() const
+      { return SIGMOID; }
+
+      const char* GetName() const
+      { return "<sigmoid>"; }
+
+      Component* Clone() const
+      { return new Sigmoid(GetNInputs(),GetNOutputs(),NULL); }
+
+    protected:
+      void PropagateFnc(const BfMatrix& X, BfMatrix& Y);
+      void BackpropagateFnc(const BfMatrix& X, BfMatrix& Y);
+  };
+    
+
+  /**
+   * Softmax activation function
+   */
+  class Softmax : public Component
+  {
+    public:
+      Softmax(size_t nInputs, size_t nOutputs, Component *pPred)
+       : Component(nInputs,nOutputs,pPred)
+      { }
+
+      ComponentType GetType() const
+      { return SOFTMAX; }
+
+      const char* GetName() const
+      { return "<softmax>"; }
+
+      Component* Clone() const
+      { return new Softmax(GetNInputs(),GetNOutputs(),NULL); }
+
+    protected:
+      void PropagateFnc(const BfMatrix& X, BfMatrix& Y);
+      void BackpropagateFnc(const BfMatrix& X, BfMatrix& Y);
+  };
+
+
+  /**
+   * BlockSoftmax activation function.
+   * It is several softmaxes in one.
+   * The dimensions of softmaxes are given by integer vector.
+   * During backpropagation: 
+   *  If the derivatives sum up to 0, they are backpropagated. 
+   *  If the derivatives sup up to 1, they are discarded
+   *  (like this we know that the softmax was 'inactive').
+   */
+  class BlockSoftmax : public Component
+  {
+    public:
+      BlockSoftmax(size_t nInputs, size_t nOutputs, Component *pPred)
+       : Component(nInputs,nOutputs,pPred)
+      { }
+
+      ComponentType GetType() const
+      { return BLOCK_SOFTMAX; }
+
+      const char* GetName() const
+      { return "<blocksoftmax>"; }
+
+      Component* Clone() const
+      { return new BlockSoftmax(*this); }
+
+      void ReadFromStream(std::istream& rIn);
+      void WriteToStream(std::ostream& rOut);
+
+    protected:
+      void PropagateFnc(const BfMatrix& X, BfMatrix& Y);
+      void BackpropagateFnc(const BfMatrix& X, BfMatrix& Y);
+
+    private:
+      Vector<int> mDim;
+      Vector<int> mDimOffset;
+  };
+
+
+  
+} //namespace
+
+
+#endif
diff --git a/src/TNetLib/Barrier.cc b/src/TNetLib/Barrier.cc
new file mode 100644
index 0000000..0170e04
--- /dev/null
+++ b/src/TNetLib/Barrier.cc
@@ -0,0 +1,143 @@
+/*
+ * barrier.c
+ *
+ * This file implements the "barrier" synchronization construct.
+ *
+ * A barrier causes threads to wait until a set of threads has
+ * all "reached" the barrier. The number of threads required is
+ * set when the barrier is initialized, and cannot be changed
+ * except by reinitializing.
+ *
+ * The barrier_init() and barrier_destroy() functions,
+ * respectively, allow you to initialize and destroy the
+ * barrier.
+ *
+ * The barrier_wait() function allows a thread to wait for a
+ * barrier to be completed. One thread (the one that happens to
+ * arrive last) will return from barrier_wait() with the status
+ * -1 on success -- others will return with 0. The special
+ * status makes it easy for the calling code to cause one thread
+ * to do something in a serial region before entering another
+ * parallel section of code.
+ */
+#include <pthread.h>
+#include "Error.h"
+#include "Barrier.h"
+
+namespace TNet {
+
+/*
+ * Initialize a barrier for use.
+ */
+Barrier::Barrier(int count)
+ : threshold_(count), counter_(count), cycle_(0) {
+
+  if(0 != pthread_mutex_init(&mutex_, NULL))
+    KALDI_ERR << "Cannot initialize mutex";
+  
+  if(0 != pthread_cond_init(&cv_, NULL)) {
+    pthread_mutex_destroy(&mutex_);
+    KALDI_ERR << "Cannot initilize condv";
+  }
+}
+
+/*
+ * Destroy a barrier when done using it.
+ */
+Barrier::~Barrier() {
+
+  if(0 != pthread_mutex_lock(&mutex_))
+    KALDI_ERR << "Cannot lock mutex";
+
+  /*
+   * Check whether any threads are known to be waiting; report
+   * "BUSY" if so.
+   */
+  if(counter_ != threshold_) {
+    pthread_mutex_unlock (&mutex_);
+    KALDI_ERR << "Cannot destroy barrier with waiting thread";
+  }
+
+  if(0 != pthread_mutex_unlock(&mutex_))
+    KALDI_ERR << "Cannot unlock barrier";
+
+  /*
+   * If unable to destroy either 1003.1c synchronization
+   * object, halt
+   */
+  if(0 != pthread_mutex_destroy(&mutex_))
+    KALDI_ERR << "Cannot destroy mutex";
+
+  if(0 != pthread_cond_destroy(&cv_)) 
+    KALDI_ERR << "Cannot destroy condv";
+}
+
+
+void Barrier::SetThreshold(int thr) {
+  if(counter_ != threshold_) 
+    KALDI_ERR << "Cannot set threshold, while a thread is waiting";
+
+  threshold_ = thr; counter_ = thr;
+}
+
+
+
+/*
+ * Wait for all members of a barrier to reach the barrier. When
+ * the count (of remaining members) reaches 0, broadcast to wake
+ * all threads waiting.
+ */
+int Barrier::Wait() {
+  int status, cancel, tmp, cycle;
+
+  if(threshold_ == 0)
+    KALDI_ERR << "Cannot wait when Threshold value was not set";
+
+  if(0 != pthread_mutex_lock(&mutex_)) 
+    KALDI_ERR << "Cannot lock mutex";
+
+  cycle = cycle_;   /* Remember which cycle we're on */
+
+  if(--counter_ == 0) {
+    cycle_ = !cycle_;
+    counter_ = threshold_;
+    status = pthread_cond_broadcast(&cv_);
+    /*
+     * The last thread into the barrier will return status
+     * -1 rather than 0, so that it can be used to perform
+     * some special serial code following the barrier.
+     */
+    if(status == 0) status = -1;
+  } else {
+    /*
+     * Wait with cancellation disabled, because barrier_wait
+     * should not be a cancellation point.
+     */
+    pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel);
+
+    /*
+     * Wait until the barrier's cycle changes, which means
+     * that it has been broadcast, and we don't want to wait
+     * anymore.
+     */
+    while (cycle == cycle_) {
+      status = pthread_cond_wait(&cv_, &mutex_);
+      if (status != 0) break;
+    }
+
+    pthread_setcancelstate(cancel, &tmp);
+  }
+  /*
+   * Ignore an error in unlocking. It shouldn't happen, and
+   * reporting it here would be misleading -- the barrier wait
+   * completed, after all, whereas returning, for example,
+   * EINVAL would imply the wait had failed. The next attempt
+   * to use the barrier *will* return an error, or hang, due
+   * to whatever happened to the mutex.
+   */
+  pthread_mutex_unlock (&mutex_);
+  return status;          /* error, -1 for waker, or 0 */
+}
+
+
+}//namespace TNet
diff --git a/src/TNetLib/Barrier.h b/src/TNetLib/Barrier.h
new file mode 100644
index 0000000..a5849d2
--- /dev/null
+++ b/src/TNetLib/Barrier.h
@@ -0,0 +1,41 @@
+/*
+ * barrier.h
+ *
+ * This header file describes the "barrier" synchronization
+ * construct. The type barrier_t describes the full state of the
+ * barrier including the POSIX 1003.1c synchronization objects
+ * necessary.
+ *
+ * A barrier causes threads to wait until a set of threads has
+ * all "reached" the barrier. The number of threads required is
+ * set when the barrier is initialized, and cannot be changed
+ * except by reinitializing.
+ */
+#include <pthread.h>
+
+#ifndef barrier_h
+#define barrier_h
+
+namespace TNet {
+
+/*
+ * Structure describing a barrier.
+ */
+class Barrier {
+ public:
+  Barrier(int count=0);
+  ~Barrier();
+  void SetThreshold(int thr);
+  int Wait();
+ private:
+  pthread_mutex_t     mutex_;          /* Control access to barrier */
+  pthread_cond_t      cv_;             /* wait for barrier */
+  int                 threshold_;      /* number of threads required */
+  int                 counter_;        /* current number of threads */
+  int                 cycle_;          /* alternate wait cycles (0 or 1) */
+};
+
+}//namespace TNet
+
+#endif
+
diff --git a/src/TNetLib/BiasedLinearity.cc b/src/TNetLib/BiasedLinearity.cc
new file mode 100644
index 0000000..b52aeb0
--- /dev/null
+++ b/src/TNetLib/BiasedLinearity.cc
@@ -0,0 +1,180 @@
+
+
+#include "BiasedLinearity.h"
+
+
+namespace TNet {
+
+
+void
+BiasedLinearity::
+PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+{
+  //y = b + x.A
+
+  //precopy bias
+  size_t rows = X.Rows();
+  for(size_t i=0; i<rows; i++) {
+    Y[i].Copy(*mpBias);
+  }
+
+  //multiply matrix by matrix with mLinearity
+  Y.BlasGemm(1.0f, X, NO_TRANS, *mpLinearity, NO_TRANS, 1.0f);
+}
+
+
+void
+BiasedLinearity::
+BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+{
+  // e' = e.A^T
+  Y.Zero();
+  Y.BlasGemm(1.0f, X, NO_TRANS, *mpLinearity, TRANS, 0.0f);
+}
+
+
+
+void
+BiasedLinearity::
+ReadFromStream(std::istream& rIn)
+{
+  //matrix is stored transposed as SNet does
+  Matrix<BaseFloat> transpose;
+  rIn >> transpose;
+  mLinearity = Matrix<BaseFloat>(transpose, TRANS);
+  //biases stored normally
+  rIn >> mBias;
+}
+
+ 
+void
+BiasedLinearity::
+WriteToStream(std::ostream& rOut)
+{
+  //matrix is stored transposed as SNet does
+  Matrix<BaseFloat> transpose(mLinearity, TRANS);
+  rOut << transpose;
+  //biases stored normally
+  rOut << mBias;
+  rOut << std::endl;
+}
+
+
+void
+BiasedLinearity::
+Gradient()
+{
+  //calculate gradient of weight matrix
+  mLinearityCorrection.Zero();
+  mLinearityCorrection.BlasGemm(1.0f, GetInput(), TRANS, 
+                                GetErrorInput(), NO_TRANS, 
+                                0.0f);
+
+  //calculate gradient of bias
+  mBiasCorrection.Set(0.0f);
+  size_t rows = GetInput().Rows();
+  for(size_t i=0; i<rows; i++) {
+    mBiasCorrection.Add(GetErrorInput()[i]);
+  }
+
+  /* 
+  //perform update
+  mLinearity.AddScaled(-mLearningRate, mLinearityCorrection);
+  mBias.AddScaled(-mLearningRate, mBiasCorrection);
+  */
+}
+
+
+void 
+BiasedLinearity::
+AccuGradient(const UpdatableComponent& src, int thr, int thrN) {
+  //cast the argument
+  const BiasedLinearity& src_comp = dynamic_cast<const BiasedLinearity&>(src);
+
+  //allocate accumulators when needed
+  if(mLinearityCorrectionAccu.MSize() == 0) {
+    mLinearityCorrectionAccu.Init(mLinearity.Rows(),mLinearity.Cols());
+  }
+  if(mBiasCorrectionAccu.MSize() == 0) {
+    mBiasCorrectionAccu.Init(mBias.Dim());
+  }
+
+  //need to find out which rows to sum...
+  int div = mLinearityCorrection.Rows() / thrN;
+  int mod = mLinearityCorrection.Rows() % thrN;
+
+  int origin = thr * div + ((mod > thr)? thr : mod);
+  int rows = div + ((mod > thr)? 1 : 0);
+
+  //create the matrix windows
+  const SubMatrix<BaseFloat> src_mat (
+    src_comp.mLinearityCorrection, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+  SubMatrix<double> tgt_mat (
+    mLinearityCorrectionAccu, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+  //sum the rows
+  Add(tgt_mat,src_mat);
+
+  //first thread will always sum the bias correction
+  if(thr == 0) {
+    Add(mBiasCorrectionAccu,src_comp.mBiasCorrection);
+  }
+
+}
+
+
+void
+BiasedLinearity::
+Update(int thr, int thrN)
+{
+  //need to find out which rows to sum...
+  int div = mLinearity.Rows() / thrN;
+  int mod = mLinearity.Rows() % thrN;
+
+  int origin = thr * div + ((mod > thr)? thr : mod);
+  int rows = div + ((mod > thr)? 1 : 0);
+
+  //std::cout << "[P" << thr << "," << origin << "," << rows << "]" << std::flush;
+
+  //get the matrix windows
+  SubMatrix<double> src_mat (
+    mLinearityCorrectionAccu, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+  SubMatrix<BaseFloat> tgt_mat (
+    mLinearity, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+
+
+  //update weights
+  AddScaled(tgt_mat, src_mat, -mLearningRate);
+
+  //perform L2 regularization (weight decay)
+  BaseFloat L2_decay = -mLearningRate * mWeightcost * mBunchsize;
+  if(L2_decay != 0.0) {
+    tgt_mat.AddScaled(L2_decay, tgt_mat);
+  }
+
+  //first thread always update bias
+  if(thr == 0) {
+    //std::cout << "[" << thr << "BP]" << std::flush;
+    AddScaled(mBias, mBiasCorrectionAccu, -mLearningRate);
+  }
+
+  //reset the accumulators
+  src_mat.Zero();
+  if(thr == 0) {
+    mBiasCorrectionAccu.Zero();
+  }
+
+}
+
+} //namespace
diff --git a/src/TNetLib/BiasedLinearity.h b/src/TNetLib/BiasedLinearity.h
new file mode 100644
index 0000000..5018637
--- /dev/null
+++ b/src/TNetLib/BiasedLinearity.h
@@ -0,0 +1,92 @@
+#ifndef _BIASED_LINEARITY_H_
+#define _BIASED_LINEARITY_H_
+
+
+#include "Component.h"
+
+#include "Matrix.h"
+#include "Vector.h"
+
+
+namespace TNet {
+
+class BiasedLinearity : public UpdatableComponent
+{
+ public:
+
+  BiasedLinearity(size_t nInputs, size_t nOutputs, Component *pPred);
+  ~BiasedLinearity() { } 
+  
+  ComponentType GetType() const
+  { return BIASED_LINEARITY; }
+
+  const char* GetName() const
+  { return "<BiasedLinearity>"; }
+
+  Component* Clone() const;
+
+  void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y);
+  void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y);
+
+  void ReadFromStream(std::istream& rIn);
+  void WriteToStream(std::ostream& rOut);
+
+  /// calculate gradient
+  void Gradient();
+  /// accumulate gradient from other components
+  void AccuGradient(const UpdatableComponent& src, int thr, int thrN);  
+  /// update weights, reset the accumulator
+  void Update(int thr, int thrN);
+
+ protected:
+  Matrix<BaseFloat> mLinearity;  ///< Matrix with neuron weights
+  Vector<BaseFloat> mBias;       ///< Vector with biases
+
+  const Matrix<BaseFloat>* mpLinearity;
+  const Vector<BaseFloat>* mpBias;
+
+  Matrix<BaseFloat> mLinearityCorrection; ///< Matrix for linearity updates
+  Vector<BaseFloat> mBiasCorrection;      ///< Vector for bias updates
+
+  Matrix<double> mLinearityCorrectionAccu; ///< Matrix for summing linearity updates
+  Vector<double> mBiasCorrectionAccu;      ///< Vector for summing bias updates
+
+};
+
+
+
+
+////////////////////////////////////////////////////////////////////////////
+// INLINE FUNCTIONS 
+// BiasedLinearity::
+inline 
+BiasedLinearity::
+BiasedLinearity(size_t nInputs, size_t nOutputs, Component *pPred)
+  : UpdatableComponent(nInputs, nOutputs, pPred), 
+    mLinearity(), mBias(), //cloned instaces don't need this
+    mpLinearity(&mLinearity), mpBias(&mBias), 
+    mLinearityCorrection(nInputs,nOutputs), mBiasCorrection(nOutputs),
+    mLinearityCorrectionAccu(), mBiasCorrectionAccu() //cloned instances don't need this
+{ }
+
+inline
+Component* 
+BiasedLinearity::
+Clone() const
+{
+  BiasedLinearity* ptr = new BiasedLinearity(GetNInputs(), GetNOutputs(), NULL);
+  ptr->mpLinearity = mpLinearity; //copy pointer from currently active weights
+  ptr->mpBias = mpBias;           //...
+
+  ptr->mLearningRate = mLearningRate;
+  
+  return ptr;
+}
+
+
+
+} //namespace
+
+
+
+#endif
diff --git a/src/TNetLib/BlockArray.cc b/src/TNetLib/BlockArray.cc
new file mode 100644
index 0000000..18a41d2
--- /dev/null
+++ b/src/TNetLib/BlockArray.cc
@@ -0,0 +1,136 @@
+
+
+#include "BlockArray.h"
+#include "Nnet.h"
+
+
+namespace TNet
+{
+
+  void 
+  BlockArray::
+  PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+  {
+    SubMatrix<BaseFloat> colsX(X,0,1,0,1); //dummy dimensions
+    SubMatrix<BaseFloat> colsY(Y,0,1,0,1); //dummy dimensions
+    
+    int X_src_ori=0, Y_tgt_ori=0;
+    for(int i=0; i<mNBlocks; i++) {
+      //get the correct submatrices
+      int colsX_cnt=mBlocks[i]->GetNInputs();
+      int colsY_cnt=mBlocks[i]->GetNOutputs();
+      colsX = X.Range(0,X.Rows(),X_src_ori,colsX_cnt);
+      colsY = Y.Range(0,Y.Rows(),Y_tgt_ori,colsY_cnt);
+
+      //propagate through the block(network)
+      mBlocks[i]->Propagate(colsX,colsY);
+
+      //shift the origin coordinates
+      X_src_ori += colsX_cnt;
+      Y_tgt_ori += colsY_cnt;
+    }
+
+    assert(X_src_ori == X.Cols());
+    assert(Y_tgt_ori == Y.Cols());
+  }
+
+
+  void 
+  BlockArray::
+  BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+  {
+    KALDI_ERR << "Unimplemented";
+  }
+
+  
+  void 
+  BlockArray::
+  Update() 
+  {
+    KALDI_ERR << "Unimplemented";
+  }
+
+
+  void
+  BlockArray::
+  ReadFromStream(std::istream& rIn)
+  {
+    if(mBlocks.size() > 0) {
+      KALDI_ERR << "Cannot read block vector, "
+                << "aleady filled bt "
+                << mBlocks.size()
+                << "elements";
+    }
+
+    rIn >> std::ws >> mNBlocks;
+    if(mNBlocks < 1) {
+      KALDI_ERR << "Bad number of blocks:" << mNBlocks;
+    }
+
+    //read all the blocks
+    std::string tag;
+    int block_id;
+    for(int i=0; i<mNBlocks; i++) {
+      //read tag <block>
+      rIn >> std::ws >> tag;
+      //make it lowercase
+      std::transform(tag.begin(), tag.end(), tag.begin(), tolower);
+      //check
+      if(tag!="<block>") {
+        KALDI_ERR << "<block> keywotd expected";
+      }
+    
+      //read block number
+      rIn >> std::ws >> block_id;
+      if(block_id != i+1) {
+        KALDI_ERR << "Expected block number:" << i+1
+                  << " read block number: " << block_id;
+      }
+
+      //read the nnet
+      Network* p_nnet = new Network;
+      p_nnet->ReadNetwork(rIn);
+      if(p_nnet->Layers() == 0) {
+        KALDI_ERR << "Cannot read empty network to a block";
+      }
+
+      //add it to the vector
+      mBlocks.push_back(p_nnet);
+    }
+
+    //check the declared dimensionality
+    int sum_inputs=0, sum_outputs=0;
+    for(int i=0; i<mNBlocks; i++) {
+      sum_inputs += mBlocks[i]->GetNInputs();
+      sum_outputs += mBlocks[i]->GetNOutputs();
+    }
+    if(sum_inputs != GetNInputs()) {
+      KALDI_ERR << "Non-matching number of INPUTS! Declared:"
+                << GetNInputs()
+                << " summed from blocks"
+                << sum_inputs;
+    }
+    if(sum_outputs != GetNOutputs()) {
+      KALDI_ERR << "Non-matching number of OUTPUTS! Declared:"
+                << GetNOutputs()
+                << " summed from blocks"
+                << sum_outputs;
+    }
+  }
+
+   
+  void
+  BlockArray::
+  WriteToStream(std::ostream& rOut)
+  {
+    rOut << " " << mBlocks.size() << " ";
+    for(int i=0; i<mBlocks.size(); i++) {
+      rOut << "<block> " << i+1 << "\n";
+      mBlocks[i]->WriteNetwork(rOut);
+      rOut << "<endblock>\n";
+    }
+  }
+
+ 
+} //namespace
+
diff --git a/src/TNetLib/BlockArray.h b/src/TNetLib/BlockArray.h
new file mode 100644
index 0000000..e6a8657
--- /dev/null
+++ b/src/TNetLib/BlockArray.h
@@ -0,0 +1,85 @@
+#ifndef _BLOCK_ARRAY_H_
+#define _BLOCK_ARRAY_H_
+
+
+#include "Component.h"
+
+#include "Matrix.h"
+#include "Vector.h"
+
+
+namespace TNet {
+
+  class Network;
+
+  class BlockArray : public Component
+  {
+    public:
+
+      BlockArray(size_t nInputs, size_t nOutputs, Component *pPred); 
+      ~BlockArray();  
+      
+      ComponentType GetType() const;
+      const char* GetName() const;
+
+      void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y);
+      void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y);
+
+      void Update();
+
+      void ReadFromStream(std::istream& rIn);
+      void WriteToStream(std::ostream& rOut);
+ 
+      //:TODO:
+      Component* Clone() const { KALDI_ERR << "Unimplemented"; }
+
+    protected:
+      std::vector<Network*> mBlocks; ///< vector with networks, one network is one block
+      size_t mNBlocks;  
+  };
+
+
+
+
+  ////////////////////////////////////////////////////////////////////////////
+  // INLINE FUNCTIONS 
+  // BlockArray::
+  inline 
+  BlockArray::
+  BlockArray(size_t nInputs, size_t nOutputs, Component *pPred)
+    : Component(nInputs, nOutputs, pPred), 
+      mNBlocks(0) 
+  { }
+
+
+  inline
+  BlockArray::
+  ~BlockArray()
+  { 
+    for(int i=0; i<mBlocks.size(); i++) {
+      delete mBlocks[i];
+    }
+    mBlocks.clear();
+  }
+
+  inline Component::ComponentType
+  BlockArray::
+  GetType() const
+  {
+    return Component::BLOCK_ARRAY;
+  }
+
+  inline const char*
+  BlockArray::
+  GetName() const
+  {
+    return "<blockarray>";
+  }
+
+
+
+} //namespace
+
+
+
+#endif
diff --git a/src/TNetLib/CRBEDctFeat.h b/src/TNetLib/CRBEDctFeat.h
new file mode 100644
index 0000000..0984c36
--- /dev/null
+++ b/src/TNetLib/CRBEDctFeat.h
@@ -0,0 +1,432 @@
+#ifndef _CUCRBEDCTFEATURES_H_
+#define _CUCRBEDCTFEATURES_H_
+
+
+#include "Component.h"
+#include "Matrix.h"
+#include "Vector.h"
+#include "cblas.h"
+
+
+namespace TNet {
+
+  /**
+   * Expands the time context of the input features
+   * in N, out k*N, FrameOffset o_1,o_2,...,o_k
+   * FrameOffset example 11frames: -5 -4 -3 -2 -1 0 1 2 3 4 5
+   */
+  class Expand : public Component
+  {
+   public:
+    Expand(size_t nInputs, size_t nOutputs, Component* pPred)
+      : Component(nInputs,nOutputs,pPred)
+    { }
+
+    ~Expand()
+    { }
+
+    ComponentType GetType() const
+    { return EXPAND; }
+
+    const char* GetName() const
+    { return "<expand>"; }
+   
+    Component* Clone() const 
+    { 
+      Expand* p = new Expand(GetNInputs(),GetNOutputs(),NULL);
+      p->mFrameOffset.Init(mFrameOffset.Dim()); 
+      p->mFrameOffset.Copy(mFrameOffset); 
+      return p; 
+    }
+
+    void ReadFromStream(std::istream& rIn)
+    { rIn >> mFrameOffset; }
+
+    void WriteToStream(std::ostream& rOut)  
+    { rOut << mFrameOffset; }
+     
+   protected:
+    void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+    {
+      assert(X.Cols()*mFrameOffset.Dim() == Y.Cols());
+      assert(X.Rows() == Y.Rows());
+
+      for(size_t r=0;r<X.Rows();r++) {
+        for(size_t off=0;off<mFrameOffset.Dim();off++) {
+          int r_off = r + mFrameOffset[off];
+          if(r_off < 0) r_off = 0;
+          if(r_off >= X.Rows()) r_off = X.Rows()-1;
+          memcpy(Y.pRowData(r)+off*X.Cols(),X.pRowData(r_off),sizeof(BaseFloat)*X.Cols());
+        }
+      }
+    }
+
+    void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+    { Error("__func__ Nonsense"); }
+
+   protected:
+    Vector<int> mFrameOffset;
+  };
+
+
+
+  /**
+   * Rearrange the matrix columns according to the indices in mCopyFromIndices
+   */
+  class Copy : public Component
+  {
+   public:
+    Copy(size_t nInputs, size_t nOutputs, Component* pPred)
+      : Component(nInputs,nOutputs,pPred)
+    { }
+
+    ~Copy()
+    { }
+
+    ComponentType GetType() const
+    { return COPY; }
+
+    const char* GetName() const
+    { return "<copy>"; }
+    
+    Component* Clone() const 
+    { 
+      Copy* p = new Copy(GetNInputs(),GetNOutputs(),NULL);
+      p->mCopyFromIndices.Init(mCopyFromIndices.Dim()); 
+      p->mCopyFromIndices.Copy(mCopyFromIndices); 
+      return p; 
+    }
+
+    void ReadFromStream(std::istream& rIn)
+    { 
+      Vector<int> vec; rIn >> vec; vec.Add(-1); 
+      mCopyFromIndices.Init(vec.Dim()).Copy(vec);
+    }
+
+    void WriteToStream(std::ostream& rOut)  
+    { 
+      Vector<int> vec(mCopyFromIndices); 
+      vec.Add(1); rOut << vec; 
+    }
+     
+   protected:
+    void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+    {
+      assert(mCopyFromIndices.Dim() == Y.Cols());
+      for(int i=0; i<mCopyFromIndices.Dim();i++) {
+        assert(mCopyFromIndices[i] >= 0 && mCopyFromIndices[i] < X.Cols());
+      }
+        
+      for(size_t r=0; r<X.Rows(); r++) {
+        for(size_t c=0; c<Y.Cols(); c++) {
+          Y(r,c) = X(r,mCopyFromIndices[c]);
+        }
+      }
+    }
+
+    void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+    { Error("__func__ Nonsense"); }
+
+   protected:
+    Vector<int> mCopyFromIndices;
+  };
+  
+  class Transpose : public Component
+  {
+   public:
+    Transpose(size_t nInputs, size_t nOutputs, Component* pPred)
+      : Component(nInputs,nOutputs,pPred), mContext(0)
+    { }
+
+    ~Transpose()
+    { }
+
+    ComponentType GetType() const
+    { return TRANSPOSE; }
+
+    const char* GetName() const
+    { return "<transpose>"; }
+ 
+    Component* Clone() const  
+    { 
+      Transpose* p = new Transpose(GetNInputs(),GetNOutputs(),NULL); 
+      p->mCopyFromIndices.Init(mCopyFromIndices.Dim());
+      p->mCopyFromIndices.Copy(mCopyFromIndices); 
+      p->mContext = mContext;
+      return p; 
+    }
+  
+    void ReadFromStream(std::istream& rIn)
+    { 
+      rIn >> std::ws >> mContext;
+
+      if(GetNInputs() != GetNOutputs()) { 
+        Error("Input dim must be same as output dim"); 
+      }
+      
+      Vector<int> vec(GetNInputs());
+      int channels = GetNInputs() / mContext;
+      for(int i=0, ch=0; ch<channels; ch++) {
+        for(int idx=ch; idx < (int)GetNInputs(); idx+=channels, i++) {
+          assert(i < (int)GetNInputs());
+          vec[i] = idx;
+        }
+      }
+
+      mCopyFromIndices.Init(vec.Dim()).Copy(vec); 
+    }
+
+    void WriteToStream(std::ostream& rOut)  
+    { rOut << " " << mContext << "\n"; }
+     
+   protected:
+    void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+    { 
+      assert(mCopyFromIndices.Dim() == Y.Cols());
+      for(int i=0; i<mCopyFromIndices.Dim();i++) {
+        assert(mCopyFromIndices[i] >= 0 && mCopyFromIndices[i] < X.Cols());
+      }
+        
+      for(size_t r=0; r<X.Rows(); r++) {
+        for(size_t c=0; c<Y.Cols(); c++) {
+          Y(r,c) = X(r,mCopyFromIndices[c]);
+        }
+      }
+    }
+
+    void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+    { Error("__func__ Nonsense"); }
+
+   protected:
+    int mContext;
+    Vector<int> mCopyFromIndices;
+  };
+
+
+  /**
+   * BlockLinearity is used for the blockwise multiplication by 
+   * DCT transform loaded from disk
+   */
+  class BlockLinearity : public Component
+  {
+    public:
+      BlockLinearity(size_t nInputs, size_t nOutputs, Component* pPred)
+        : Component(nInputs,nOutputs,pPred)
+      { }
+
+      ~BlockLinearity()
+      { }
+
+
+      ComponentType GetType() const
+      { return Component::BLOCK_LINEARITY; }
+
+      const char* GetName() const
+      { return "<blocklinearity>"; }
+
+      Component* Clone() const 
+      { 
+        BlockLinearity* p = new BlockLinearity(GetNInputs(),GetNOutputs(),NULL);
+        p->mBlockLinearity.Init(mBlockLinearity.Rows(),mBlockLinearity.Cols()); 
+        p->mBlockLinearity.Copy(mBlockLinearity); 
+        return p; 
+      }
+
+      void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y) 
+      {
+        assert(X.Rows() == Y.Rows());
+        assert(X.Cols()%mBlockLinearity.Rows() == 0);
+        assert(Y.Cols()%mBlockLinearity.Cols() == 0);
+        assert(X.Cols()/mBlockLinearity.Rows() == Y.Cols()/mBlockLinearity.Cols());
+        
+        int instN = X.Cols()/mBlockLinearity.Rows();
+        for(int inst=0; inst<instN; inst++) {
+#ifndef DOUBLEPRECISION
+          cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
+                  X.Rows(), mBlockLinearity.Cols(), mBlockLinearity.Rows(),
+                  1.0, X.pData()+inst*mBlockLinearity.Rows(), X.Stride(), 
+                  mBlockLinearity.pData(), mBlockLinearity.Stride(),
+                  0.0, Y.pData()+inst*mBlockLinearity.Cols(), Y.Stride());
+#else
+          cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
+                  X.Rows(), mBlockLinearity.Cols(), mBlockLinearity.Rows(),
+                  1.0, X.pData()+inst*mBlockLinearity.Rows(), X.Stride(), 
+                  mBlockLinearity.pData(), mBlockLinearity.Stride(),
+                  0.0, Y.pData()+inst*mBlockLinearity.Cols(), Y.Stride());
+#endif
+        }
+      }
+        
+      void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y) 
+      { Error("__func__ Not implemented"); }
+
+
+      void ReadFromStream(std::istream& rIn)
+      { 
+        Matrix<BaseFloat> mat;
+        rIn >> mat;
+        Matrix<BaseFloat> trans(mat,TRANS);
+        mBlockLinearity.Init(trans.Rows(),trans.Cols()).Copy(trans);
+
+        if((GetNOutputs() % mBlockLinearity.Cols() != 0) ||
+           (GetNInputs() % mBlockLinearity.Rows() != 0) ||
+           ((GetNOutputs() / mBlockLinearity.Cols()) != 
+            (GetNInputs() / mBlockLinearity.Rows()))) 
+        {
+          Error("BlockLinearity matrix dimensions must divide IO dims");
+        }
+      }
+
+      void WriteToStream(std::ostream& rOut)
+      {
+        Matrix<BaseFloat> trans(mBlockLinearity,TRANS);
+        rOut << trans;
+      }
+
+    private:
+      Matrix<BaseFloat> mBlockLinearity;
+  };
+
+
+  
+  class Bias : public Component
+  {
+    public:
+      Bias(size_t nInputs, size_t nOutputs, Component* pPred)
+        : Component(nInputs,nOutputs,pPred)
+      { }
+
+      ~Bias()
+      { }
+
+
+      ComponentType GetType() const
+      { return Component::BIAS; }
+
+      const char* GetName() const
+      { return "<bias>"; }
+
+      Component* Clone() const  
+      { 
+        Bias* p = new Bias(GetNInputs(),GetNOutputs(),NULL);
+        p->mBias.Init(mBias.Dim()); 
+        p->mBias.Copy(mBias); 
+        return p; 
+      }
+
+      void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+      { 
+        Y.Copy(X); 
+        for(size_t r=0; r<X.Rows(); r++) {
+          for(size_t c=0; c<X.Cols(); c++) {
+            Y(r,c) += mBias[c];
+          }
+        }
+      }
+
+      void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+      { Y.Copy(X); }
+  
+     
+      void ReadFromStream(std::istream& rIn)
+      { rIn >> mBias; }
+
+      void WriteToStream(std::ostream& rOut)
+      { rOut << mBias; }
+
+    private:
+      Vector<BaseFloat> mBias;
+  };
+
+
+
+  class Window : public Component
+  {
+    public:
+      Window(size_t nInputs, size_t nOutputs, Component* pPred)
+        : Component(nInputs, nOutputs, pPred)
+      { }
+
+      ~Window()
+      { }
+
+
+      ComponentType GetType() const
+      { return Component::WINDOW; }
+
+      const char* GetName() const
+      { return "<window>"; }
+
+      Component* Clone() const  
+      { 
+        Window* p = new Window(GetNInputs(),GetNOutputs(),NULL);
+        p->mWindow.Init(mWindow.Dim()); 
+        p->mWindow.Copy(mWindow); 
+        return p; 
+      }
+
+
+      void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+      { Y.Copy(X); 
+        for(size_t r=0; r<X.Rows(); r++) {
+          for(size_t c=0; c<X.Cols(); c++) {
+            Y(r,c) *= mWindow[c];
+          }
+        }
+      }
+
+      void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+      { Error("__func__ Not implemented"); }
+     
+      
+      void ReadFromStream(std::istream& rIn)
+      { rIn >> mWindow; }
+
+      void WriteToStream(std::ostream& rOut)
+      { rOut << mWindow; }
+
+    private:
+      Vector<BaseFloat> mWindow;
+  };
+
+  class Log : public Component
+  {
+    public:
+      Log(size_t nInputs, size_t nOutputs, Component* pPred)
+        : Component(nInputs, nOutputs, pPred)
+      { }
+
+      ~Log()
+      { }
+
+
+      ComponentType GetType() const
+      { return Component::LOG; }
+
+      const char* GetName() const
+      { return "<log>"; }
+
+      Component* Clone() const  
+      { return new Log(GetNInputs(),GetNOutputs(),NULL); }
+
+
+      void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+      { Y.Copy(X); Y.ApplyLog(); }
+
+      void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+      { Error("__func__ Not implemented"); }
+     
+      
+      void ReadFromStream(std::istream& rIn)
+      { }
+
+      void WriteToStream(std::ostream& rOut)
+      { }
+
+  };
+
+}
+
+
+#endif
+
diff --git a/src/TNetLib/Cache.cc b/src/TNetLib/Cache.cc
new file mode 100644
index 0000000..f498318
--- /dev/null
+++ b/src/TNetLib/Cache.cc
@@ -0,0 +1,248 @@
+
+#include <sys/time.h>
+
+#include "Cache.h"
+#include "Matrix.h"
+#include "Vector.h"
+
+
+namespace TNet {
+
+  Cache::
+  Cache()
+    : mState(EMPTY), mIntakePos(0), mExhaustPos(0), mDiscarded(0), 
+      mRandomized(false), mTrace(0)
+  { }
+
+  Cache::
+  ~Cache()
+  { }
+
+  void
+  Cache::
+  Init(size_t cachesize, size_t bunchsize, long int seed)
+  {
+    if((cachesize % bunchsize) != 0) {
+      KALDI_ERR << "Non divisible cachesize" << cachesize
+                << " by bunchsize" << bunchsize;
+    }
+    
+    mCachesize = cachesize;
+    mBunchsize = bunchsize;
+
+    mState = EMPTY;
+
+    mIntakePos = 0;
+    mExhaustPos = 0;
+
+    mRandomized = false;
+
+    if(seed == 0) {
+      //generate seed
+      struct timeval tv;
+      if (gettimeofday(&tv, 0) == -1) {
+        Error("gettimeofday does not work.");
+        exit(-1);
+      }
+      seed = (int)(tv.tv_sec) + (int)tv.tv_usec + (int)(tv.tv_usec*tv.tv_usec);
+    }
+
+    srand48(seed);
+
+  }
+
+  void 
+  Cache::
+  AddData(const Matrix<BaseFloat>& rFeatures, const Matrix<BaseFloat>& rDesired)
+  {
+    assert(rFeatures.Rows() == rDesired.Rows());
+
+    //lazy buffers allocation
+    if(mFeatures.Rows() != mCachesize) {
+      mFeatures.Init(mCachesize,rFeatures.Cols());
+      mDesired.Init(mCachesize,rDesired.Cols());
+    }
+
+    //warn if segment longer than half-cache
+    if(rFeatures.Rows() > mCachesize/2) {
+      std::ostringstream os;
+      os << "Too long segment and small feature cache! "
+         << " cachesize: " << mCachesize
+         << " segmentsize: " << rFeatures.Rows();
+      Warning(os.str());
+    }
+
+    //change state
+    if(mState == EMPTY) { 
+      if(mTrace&3) std::cout << "/" << std::flush; 
+      mState = INTAKE; mIntakePos = 0;
+     
+      //check for leftover from previous segment 
+      int leftover = mFeaturesLeftover.Rows();
+      //check if leftover is not bigger than cachesize
+      if(leftover > mCachesize) {
+        std::ostringstream os;
+        os << "Too small feature cache: " << mCachesize
+           << ", truncating: "
+           << leftover - mCachesize << " frames from previous segment leftover";
+        //Error(os.str());
+        Warning(os.str());
+        leftover = mCachesize;
+      }
+      //prefill cache with leftover
+      if(leftover > 0) {
+        memcpy(mFeatures.pData(),mFeaturesLeftover.pData(),
+          (mFeaturesLeftover.MSize() < mFeatures.MSize()?
+           mFeaturesLeftover.MSize() : mFeatures.MSize()) 
+        );
+        memcpy(mDesired.pData(),mDesiredLeftover.pData(),
+          (mDesiredLeftover.MSize() < mDesired.MSize()?
+           mDesiredLeftover.MSize() : mDesired.MSize()) 
+        );
+        mFeaturesLeftover.Destroy();
+        mDesiredLeftover.Destroy();
+        mIntakePos += leftover;
+      } 
+    }
+
+    assert(mState == INTAKE);
+    assert(rFeatures.Rows() == rDesired.Rows());
+    if(mTrace&2) std::cout << "F" << std::flush; 
+
+    int cache_space = mCachesize - mIntakePos;
+    int feature_length = rFeatures.Rows();
+    int fill_rows = (cache_space<feature_length)? cache_space : feature_length;
+    int leftover = feature_length - fill_rows;
+
+    assert(cache_space > 0);
+    assert(mFeatures.Stride()==rFeatures.Stride());
+    assert(mDesired.Stride()==rDesired.Stride());
+
+    //copy the data to cache
+    memcpy(mFeatures.pData()+mIntakePos*mFeatures.Stride(),
+           rFeatures.pData(),
+           fill_rows*mFeatures.Stride()*sizeof(BaseFloat));
+
+    memcpy(mDesired.pData()+mIntakePos*mDesired.Stride(),
+           rDesired.pData(),
+           fill_rows*mDesired.Stride()*sizeof(BaseFloat));
+
+    //copy leftovers
+    if(leftover > 0) {
+      mFeaturesLeftover.Init(leftover,mFeatures.Cols());
+      mDesiredLeftover.Init(leftover,mDesired.Cols());
+
+      memcpy(mFeaturesLeftover.pData(),
+             rFeatures.pData()+fill_rows*rFeatures.Stride(),
+             mFeaturesLeftover.MSize());
+
+      memcpy(mDesiredLeftover.pData(),
+             rDesired.pData()+fill_rows*rDesired.Stride(),
+             mDesiredLeftover.MSize());       
+    }
+ 
+    //update cursor
+    mIntakePos += fill_rows;
+    
+    //change state
+    if(mIntakePos == mCachesize) { 
+      if(mTrace&3) std::cout << "\\" << std::flush; 
+      mState = FULL;
+    }
+  }
+
+
+
+  void
+  Cache::
+  Randomize()
+  {
+    assert(mState == FULL || mState == INTAKE);
+
+    if(mTrace&3) std::cout << "R" << std::flush;
+
+    //lazy initialization of the output buffers
+    mFeaturesRandom.Init(mCachesize,mFeatures.Cols());
+    mDesiredRandom.Init(mCachesize,mDesired.Cols());
+
+    //generate random series of integers
+    Vector<int> randmask(mIntakePos);
+    for(unsigned int i=0; i<mIntakePos; i++) {
+      randmask[i]=i;
+    }
+    int* ptr = randmask.pData();
+    std::random_shuffle(ptr, ptr+mIntakePos, GenerateRandom);
+
+    //randomize
+    for(int i=0; i<randmask.Dim(); i++) {
+      mFeaturesRandom[i].Copy(mFeatures[randmask[i]]);
+      mDesiredRandom[i].Copy(mDesired[randmask[i]]);
+    }
+
+    mRandomized = true;
+  }
+
+  void
+  Cache::
+  GetBunch(Matrix<BaseFloat>& rFeatures, Matrix<BaseFloat>& rDesired)
+  {
+    if(mState == EMPTY) {
+      Error("GetBunch on empty cache!!!");
+    }
+
+    //change state if full...
+    if(mState == FULL) { 
+      if(mTrace&3) std::cout << "\\" << std::flush; 
+      mState = EXHAUST; mExhaustPos = 0; 
+    }
+
+    //final cache is not completely filled
+    if(mState == INTAKE) {
+      if(mTrace&3) std::cout << "\\-LAST_CACHE\n" << std::flush; 
+      mState = EXHAUST; mExhaustPos = 0; 
+    } 
+
+    assert(mState == EXHAUST);
+
+    //init the output
+    if(rFeatures.Rows()!=mBunchsize || rFeatures.Cols()!=mFeatures.Cols()) {
+      rFeatures.Init(mBunchsize,mFeatures.Cols());
+    }
+    if(rDesired.Rows()!=mBunchsize || rDesired.Cols()!=mDesired.Cols()) {
+      rDesired.Init(mBunchsize,mDesired.Cols());
+    }
+
+    //copy the output
+    if(mRandomized) {
+      memcpy(rFeatures.pData(),
+             mFeaturesRandom.pData()+mExhaustPos*mFeatures.Stride(),
+             rFeatures.MSize());
+
+      memcpy(rDesired.pData(),
+             mDesiredRandom.pData()+mExhaustPos*mDesired.Stride(),
+             rDesired.MSize());
+    } else {
+      memcpy(rFeatures.pData(),
+             mFeatures.pData()+mExhaustPos*mFeatures.Stride(),
+             rFeatures.MSize());
+
+      memcpy(rDesired.pData(),
+             mDesired.pData()+mExhaustPos*mDesired.Stride(),
+             rDesired.MSize());
+    }
+
+
+    //update cursor
+    mExhaustPos += mBunchsize;
+
+    //change state to EMPTY
+    if(mExhaustPos > mIntakePos-mBunchsize) {
+      //we don't have more complete bunches...
+      mDiscarded += mIntakePos - mExhaustPos;
+
+      mState = EMPTY;
+    }
+  }
+
+
+}
diff --git a/src/TNetLib/Cache.h b/src/TNetLib/Cache.h
new file mode 100644
index 0000000..800d92c
--- /dev/null
+++ b/src/TNetLib/Cache.h
@@ -0,0 +1,74 @@
+#ifndef _CUCACHE_H_
+#define _CUCACHE_H_
+
+#include "Matrix.h"
+
+namespace TNet {
+
+
+  /**
+   * The feature-target pair cache
+   */
+  class Cache {
+    typedef enum { EMPTY, INTAKE, FULL, EXHAUST } State;
+    public:
+      Cache();
+      ~Cache();
+     
+      /// Initialize the cache
+      void Init(size_t cachesize, size_t bunchsize, long int seed = 0);
+
+      /// Add data to cache, returns number of added vectors
+      void AddData(const Matrix<BaseFloat>& rFeatures, const Matrix<BaseFloat>& rDesired);
+      /// Randomizes the cache
+      void Randomize();
+      /// Get the bunch of training data
+      void GetBunch(Matrix<BaseFloat>& rFeatures, Matrix<BaseFloat>& rDesired);
+
+
+      /// Returns true if the cache was completely filled
+      bool Full()
+      { return (mState == FULL); }
+      
+      /// Returns true if the cache is empty
+      bool Empty()
+      { return (mState == EMPTY || mIntakePos < mBunchsize); }
+      
+      /// Number of discarded frames
+      int Discarded() 
+      { return mDiscarded; }
+      
+      /// Set the trace message level
+      void Trace(int trace)
+      { mTrace = trace; }
+
+    private:
+    
+      static long int GenerateRandom(int max)
+      { return lrand48() % max; }
+      
+      State mState; ///< Current state of the cache
+
+      size_t mIntakePos; ///< Number of intaken vectors by AddData
+      size_t mExhaustPos; ///< Number of exhausted vectors by GetBunch
+      
+      size_t mCachesize; ///< Size of cache
+      size_t mBunchsize; ///< Size of bunch
+      int mDiscarded; ///< Number of discarded frames
+
+      Matrix<BaseFloat> mFeatures; ///< Feature cache
+      Matrix<BaseFloat> mFeaturesRandom; ///< Feature cache
+      Matrix<BaseFloat> mFeaturesLeftover; ///< Feature cache
+      
+      Matrix<BaseFloat> mDesired;  ///< Desired vector cache
+      Matrix<BaseFloat> mDesiredRandom;  ///< Desired vector cache
+      Matrix<BaseFloat> mDesiredLeftover;  ///< Desired vector cache
+
+      bool mRandomized;
+
+      int mTrace;
+  }; 
+
+}
+
+#endif
diff --git a/src/TNetLib/Component.h b/src/TNetLib/Component.h
new file mode 100644
index 0000000..762451e
--- /dev/null
+++ b/src/TNetLib/Component.h
@@ -0,0 +1,387 @@
+#ifndef _NETWORK_COMPONENT_I_H
+#define _NETWORK_COMPONENT_I_H
+
+
+#include "Vector.h"
+#include "Matrix.h"
+
+#include <iostream>
+#include <stdexcept>
+
+
+namespace TNet {
+
+    
+  /**
+   * Basic element of the network,
+   * it is a box with defined inputs and outputs, 
+   * and functions to refresh outputs
+   *
+   * it is able to compute tranformation function (forward pass) 
+   * and jacobian function (backward pass), 
+   * which is to be implemented in descendents
+   */ 
+  class Component 
+  {
+    public:
+    /// Types of the net components
+    typedef enum { 
+      UPDATABLE_COMPONENT = 0x0100, 
+      BIASED_LINEARITY,
+      SHARED_LINEARITY,
+
+      ACT_FUN = 0x0200, 
+      SOFTMAX,
+      SIGMOID,
+      BLOCK_SOFTMAX, 
+
+      OTHER = 0x0400,
+      EXPAND,
+      COPY,
+      TRANSPOSE,
+      BLOCK_LINEARITY,
+      WINDOW,
+      BIAS,
+      LOG,
+      
+      BLOCK_ARRAY,
+    } ComponentType;
+
+
+    //////////////////////////////////////////////////////////////
+    // Constructor & Destructor
+    public: 
+      Component(size_t nInputs, size_t nOutputs, Component *pPred); 
+      virtual ~Component();  
+       
+    //////////////////////////////////////////////////////////////
+    // Interface specification (public)
+    public:
+      /// Get Type Identification of the component
+      virtual ComponentType GetType() const = 0;  
+      /// Get Type Label of the component
+      virtual const char* GetName() const = 0;
+      /// 
+      virtual bool IsUpdatable() const 
+      { return false; }
+      /// Clone the component
+      virtual Component* Clone() const = 0; 
+
+      /// Get size of input vectors
+      size_t GetNInputs() const;  
+      /// Get size of output vectors 
+      size_t GetNOutputs() const; 
+     
+      /// IO Data getters
+      const Matrix<BaseFloat>& GetInput() const; 
+      const Matrix<BaseFloat>& GetOutput() const;
+      const Matrix<BaseFloat>& GetErrorInput() const;
+      const Matrix<BaseFloat>& GetErrorOutput() const;
+      
+      /// Set input vector (bind with the preceding NetworkComponent)
+      void SetInput(const Matrix<BaseFloat>& rInput);           
+      /// Set error input vector (bind with the following NetworkComponent) 
+      void SetErrorInput(const Matrix<BaseFloat>& rErrorInput);  
+       
+      /// Perform forward pass propagateion Input->Output
+      void Propagate(); 
+      /// Perform backward pass propagateion ErrorInput->ErrorOutput
+      void Backpropagate(); 
+ 
+      /// Reads the component parameters from stream
+      virtual void ReadFromStream(std::istream& rIn)  { }
+      /// Writes the components parameters to stream
+      virtual void WriteToStream(std::ostream& rOut)  { } 
+
+
+    ///////////////////////////////////////////////////////////////
+    // Nonpublic member functions used to update data outputs 
+    protected:
+      /// Forward pass transformation (to be implemented by descendents...)
+      virtual void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y) = 0;
+      /// Backward pass transformation (to be implemented by descendents...)
+      virtual void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y) = 0;
+
+   
+    ///////////////////////////////////////////////////////////////
+    // data members
+    protected:
+
+      size_t mNInputs;  ///< Size of input vectors
+      size_t mNOutputs; ///< Size of output vectors 
+      
+      const Matrix<BaseFloat>* mpInput; ///< inputs are NOT OWNED by component
+      const Matrix<BaseFloat>* mpErrorInput;///< inputs are NOT OWNED by component
+
+      Matrix<BaseFloat> mOutput; ///< outputs are OWNED by component
+      Matrix<BaseFloat> mErrorOutput; ///< outputs are OWNED by component
+
+  };
+
+
+  /**
+   * Class UpdatableComponent is a box which has some 
+   * parameters adjustable by learning
+   *
+   * you can set the learning rate, lock the params,
+   * and learn from each data observation
+   */
+  class UpdatableComponent : public Component
+  {
+    //////////////////////////////////////////////////////////////
+    // Constructor & Destructor
+    public: 
+      UpdatableComponent(size_t nInputs, size_t nOutputs, Component *pPred); 
+      virtual ~UpdatableComponent();
+
+
+    //////////////////////////////////////////////////////////////
+    // Interface specification (public)
+    public:
+      ///
+      virtual bool IsUpdatable() const 
+      { return true; }
+
+      /// calculate gradient
+      virtual void Gradient() = 0;
+      /// accumulate gradient from other components
+      virtual void AccuGradient(const UpdatableComponent& src, int thr, int thrN) = 0;  
+      /// update weights, reset the accumulator
+      virtual void Update(int thr, int thrN) = 0;
+
+      /// Sets the learning rate of gradient descent
+      void LearnRate(BaseFloat rate);
+      /// Gets the learning rate of gradient descent
+      BaseFloat LearnRate() const;
+
+      void Momentum(BaseFloat mmt);
+      BaseFloat Momentum() const ;
+
+      void Weightcost(BaseFloat cost);
+      BaseFloat Weightcost() const;
+
+      void Bunchsize(size_t size);
+      size_t Bunchsize() const;
+
+    protected:
+      BaseFloat mLearningRate;
+      BaseFloat mMomentum;
+      BaseFloat mWeightcost;
+      size_t mBunchsize;
+  };
+
+
+
+
+  //////////////////////////////////////////////////////////////////////////
+  // INLINE FUNCTIONS 
+  // Component::
+  inline
+  Component::
+  Component(size_t nInputs, size_t nOutputs, Component *pPred) 
+    : mNInputs(nInputs), mNOutputs(nOutputs), 
+      mpInput(NULL), mpErrorInput(NULL), 
+      mOutput(), mErrorOutput()
+  { 
+    /* DOUBLE LINK the Components */
+    if (pPred != NULL) {
+      SetInput(pPred->GetOutput());
+      pPred->SetErrorInput(GetErrorOutput());
+    }
+  } 
+
+
+  inline
+  Component::
+  ~Component()
+  {
+    ;
+  }
+
+  inline void
+  Component::
+  Propagate()
+  {
+    //initialize output buffer
+    if(mOutput.Rows() != GetInput().Rows() || mOutput.Cols() != GetNOutputs()) {
+      mOutput.Init(GetInput().Rows(),GetNOutputs());
+    }
+    //do the dimensionality test
+    if(GetNInputs() != GetInput().Cols()) {
+      KALDI_ERR << "Non-matching INPUT dim!!! Network dim: " << GetNInputs() 
+                << " Data dim: " << GetInput().Cols();
+    }
+    //run transform
+    PropagateFnc(GetInput(),mOutput);
+  
+  }
+
+
+  inline void
+  Component::
+  Backpropagate()
+  {
+    //re-initialize the output buffer
+    if(mErrorOutput.Rows() != GetErrorInput().Rows() || mErrorOutput.Cols() != GetNInputs()) {
+      mErrorOutput.Init(GetErrorInput().Rows(),GetNInputs());
+    }
+
+    //do the dimensionality test
+    assert(GetErrorInput().Cols() == mNOutputs);
+    assert(mErrorOutput.Cols() == mNInputs);
+    assert(mErrorOutput.Rows() == GetErrorInput().Rows());
+
+    //transform
+    BackpropagateFnc(GetErrorInput(),mErrorOutput);
+
+ }
+
+
+  inline void
+  Component::
+  SetInput(const Matrix<BaseFloat>& rInput)
+  {
+    mpInput = &rInput;
+  }
+
+
+  inline void
+  Component::
+  SetErrorInput(const Matrix<BaseFloat>& rErrorInput)
+  {
+    mpErrorInput = &rErrorInput;
+  }
+
+
+  inline const Matrix<BaseFloat>&
+  Component::
+  GetInput() const
+  {
+    if (NULL == mpInput) Error("mpInput is NULL");
+    return *mpInput;
+  }
+
+  inline const Matrix<BaseFloat>&
+  Component::
+  GetOutput() const
+  {
+    return mOutput;
+  }
+
+  inline const Matrix<BaseFloat>&
+  Component::
+  GetErrorInput() const
+  {
+    if (NULL == mpErrorInput) Error("mpErrorInput is NULL");
+    return *mpErrorInput;
+  }
+
+  inline const Matrix<BaseFloat>&
+  Component::
+  GetErrorOutput() const
+  {
+    return mErrorOutput;
+  }
+
+  inline size_t
+  Component::
+  GetNInputs() const
+  {
+    return mNInputs;
+  }
+
+  inline size_t
+  Component::
+  GetNOutputs() const
+  {
+    return mNOutputs;
+  }
+
+
+
+  //////////////////////////////////////////////////////////////////////////
+  // INLINE FUNCTIONS 
+  // UpdatableComponent::
+  
+  inline 
+  UpdatableComponent::
+  UpdatableComponent(size_t nInputs, size_t nOutputs, Component *pPred) 
+    : Component(nInputs, nOutputs, pPred), 
+      mLearningRate(0.0), mMomentum(0.0), mWeightcost(0.0), mBunchsize(0)
+  {
+    ; 
+  } 
+
+
+  inline
+  UpdatableComponent::
+  ~UpdatableComponent()
+  {
+    ;
+  }
+
+
+  inline void
+  UpdatableComponent::
+  LearnRate(BaseFloat rate)
+  {
+    mLearningRate = rate;
+  }
+
+  inline BaseFloat
+  UpdatableComponent::
+  LearnRate() const
+  {
+    return mLearningRate;
+  }
+
+
+  inline void
+  UpdatableComponent::
+  Momentum(BaseFloat mmt)
+  {
+    mMomentum = mmt;
+  }
+
+  inline BaseFloat
+  UpdatableComponent::
+  Momentum() const
+  {
+    return mMomentum;
+  }
+  
+  
+  inline void
+  UpdatableComponent::
+  Weightcost(BaseFloat cost)
+  {
+    mWeightcost = cost;
+  }
+
+  inline BaseFloat
+  UpdatableComponent::
+  Weightcost() const
+  {
+    return mWeightcost;
+  }
+
+  
+  inline void
+  UpdatableComponent::
+  Bunchsize(size_t size)
+  {
+    mBunchsize = size;
+  }
+
+  inline size_t
+  UpdatableComponent::
+  Bunchsize() const
+  {
+    return mBunchsize;
+  }
+
+
+} // namespace TNet
+
+
+#endif
diff --git a/src/TNetLib/Makefile b/src/TNetLib/Makefile
new file mode 100644
index 0000000..58ff988
--- /dev/null
+++ b/src/TNetLib/Makefile
@@ -0,0 +1,29 @@
+
+include ../tnet.mk
+
+INCLUDE = -I. -I../KaldiLib -I../STKLib/ 
+
+all: libTNetLib.a
+
+libTNetLib.a: $(OBJ)
+	$(AR) ruv $@ $(OBJ) 
+	$(RANLIB) $@
+
+%.o : %.cc
+	$(CXX)  -o $@  -c $< $(CFLAGS) $(CXXFLAGS) $(INCLUDE)
+
+
+
+.PHONY: clean doc depend
+clean:
+	rm -f *.o *.a
+
+doc:
+	doxygen ../../doc/doxyfile_TNetLib 
+
+depend:
+	$(CXX) -M $(CXXFLAGS) *.cc $(INCLUDE) > .depend.mk
+
+-include .depend.mk
+
+
diff --git a/src/TNetLib/Mutex.cc b/src/TNetLib/Mutex.cc
new file mode 100644
index 0000000..4ec956a
--- /dev/null
+++ b/src/TNetLib/Mutex.cc
@@ -0,0 +1,48 @@
+
+#include <pthread.h>
+#include <cerrno>
+
+#include "Error.h"
+#include "Mutex.h"
+
+namespace TNet {
+  
+
+Mutex::Mutex() {
+  if(0 != pthread_mutex_init(&mutex_,NULL)) 
+    KALDI_ERR << "Cannot initialize mutex";
+}
+
+
+Mutex::~Mutex() {
+  if(0 != pthread_mutex_destroy(&mutex_)) 
+    KALDI_ERR << "Cannot destroy mutex";
+}
+
+
+void Mutex::Lock() {
+  if(0 != pthread_mutex_lock(&mutex_))
+    KALDI_ERR << "Error on locking mutex";
+}
+
+ 
+bool Mutex::TryLock() {
+  int ret = pthread_mutex_lock(&mutex_);
+  switch (ret) {
+    case 0: return true;
+    case EBUSY: return false;
+    default: KALDI_ERR << "Error on try-locking mutex";
+  }
+  return 0;//make compiler not complain
+}
+
+
+void Mutex::Unlock() {
+  if(0 != pthread_mutex_unlock(&mutex_))
+    KALDI_ERR << "Error on unlocking mutex";
+}
+
+
+  
+}//namespace TNet
+
diff --git a/src/TNetLib/Mutex.h b/src/TNetLib/Mutex.h
new file mode 100644
index 0000000..ae2cfff
--- /dev/null
+++ b/src/TNetLib/Mutex.h
@@ -0,0 +1,34 @@
+
+#include <pthread.h>
+
+namespace TNet {
+
+/**
+ * This class encapsulates mutex to ensure 
+ * exclusive access to some critical section
+ * which manipulates shared resources.
+ *
+ * The mutex must be unlocked from the 
+ * SAME THREAD which locked it
+ */
+class Mutex {
+ public:
+  Mutex();
+  ~Mutex();
+
+  void Lock();
+
+  /**
+   * Try to lock the mutex without waiting for it.
+   * Returns: true when lock successfull,
+   *         false when mutex was already locked
+   */
+  bool TryLock();
+
+  void Unlock();
+
+ private:
+  pthread_mutex_t mutex_;
+};
+
+} //namespace TNet
diff --git a/src/TNetLib/Nnet.cc b/src/TNetLib/Nnet.cc
new file mode 100644
index 0000000..4b364ac
--- /dev/null
+++ b/src/TNetLib/Nnet.cc
@@ -0,0 +1,360 @@
+
+#include <algorithm>
+//#include <locale>
+#include <cctype>
+
+#include "Nnet.h"
+#include "CRBEDctFeat.h"
+#include "BlockArray.h"
+
+namespace TNet {
+
+
+
+
+void Network::Feedforward(const Matrix<BaseFloat>& in, Matrix<BaseFloat>& out, 
+                          size_t start_frm_ext, size_t end_frm_ext) {
+  //empty network: copy input to output 
+  if(mNnet.size() == 0) {
+    if(out.Rows() != in.Rows() || out.Cols() != in.Cols()) {
+      out.Init(in.Rows(),in.Cols());
+    }
+    out.Copy(in);
+    return;
+  }
+  
+  //short input: propagate in one block  
+  if(in.Rows() < 5000) { 
+    Propagate(in,out);
+  } else {//long input: propagate per parts
+    //initialize
+    out.Init(in.Rows(),GetNOutputs());
+    Matrix<BaseFloat> tmp_in, tmp_out;
+    int done=0, block=1024;
+    //propagate first part
+    tmp_in.Init(block+end_frm_ext,in.Cols());
+    tmp_in.Copy(in.Range(0,block+end_frm_ext,0,in.Cols()));
+    Propagate(tmp_in,tmp_out);
+    out.Range(0,block,0,tmp_out.Cols()).Copy(
+      tmp_out.Range(0,block,0,tmp_out.Cols())
+    );
+    done += block;
+    //propagate middle parts
+    while((done+2*block) < in.Rows()) {
+      tmp_in.Init(block+start_frm_ext+end_frm_ext,in.Cols());
+      tmp_in.Copy(in.Range(done-start_frm_ext, block+start_frm_ext+end_frm_ext, 0,in.Cols()));      Propagate(tmp_in,tmp_out);
+      out.Range(done,block,0,tmp_out.Cols()).Copy(
+        tmp_out.Range(start_frm_ext,block,0,tmp_out.Cols())
+      );
+      done += block;
+    }
+    //propagate last part
+    tmp_in.Init(in.Rows()-done+start_frm_ext,in.Cols());
+    tmp_in.Copy(in.Range(done-start_frm_ext,in.Rows()-done+start_frm_ext,0,in.Cols()));
+    Propagate(tmp_in,tmp_out);
+    out.Range(done,out.Rows()-done,0,out.Cols()).Copy(
+      tmp_out.Range(start_frm_ext,tmp_out.Rows()-start_frm_ext,0,tmp_out.Cols())   
+    );
+
+    done += tmp_out.Rows()-start_frm_ext;
+    assert(done == out.Rows());
+  }
+}
+
+
+void Network::Propagate(const Matrix<BaseFloat>& in, Matrix<BaseFloat>& out) {
+  //empty network: copy input to output 
+  if(mNnet.size() == 0) {
+    if(out.Rows() != in.Rows() || out.Cols() != in.Cols()) {
+      out.Init(in.Rows(),in.Cols());
+    }
+    out.Copy(in);
+    return;
+  }
+  
+  //this will keep pointer to matrix 'in', for backprop
+  mNnet.front()->SetInput(in); 
+
+  //propagate
+  LayeredType::iterator it;
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    (*it)->Propagate();
+  }
+
+  //copy the output matrix
+  const Matrix<BaseFloat>& mat = mNnet.back()->GetOutput();
+  if(out.Rows() != mat.Rows() || out.Cols() != mat.Cols()) {
+    out.Init(mat.Rows(),mat.Cols());
+  }
+  out.Copy(mat);
+
+}
+
+
+void Network::Backpropagate(const Matrix<BaseFloat>& globerr) {
+  //pass matrix to last component
+  mNnet.back()->SetErrorInput(globerr);
+
+  // back-propagation : reversed order,
+  LayeredType::reverse_iterator it;
+  for(it=mNnet.rbegin(); it!=mNnet.rend(); ++it) {
+    //first component does not backpropagate error (no predecessors)
+    if(*it != mNnet.front()) {
+      (*it)->Backpropagate();
+    }
+    //compute gradient if updatable component
+    if((*it)->IsUpdatable()) {
+      UpdatableComponent& comp = dynamic_cast<UpdatableComponent&>(**it);
+      comp.Gradient(); //compute gradient 
+    }
+  }
+}
+
+
+void Network::AccuGradient(const Network& src, int thr, int thrN) {
+  LayeredType::iterator it;
+  LayeredType::const_iterator it2;
+
+  for(it=mNnet.begin(), it2=src.mNnet.begin(); it!=mNnet.end(); ++it,++it2) {
+    if((*it)->IsUpdatable()) {
+      UpdatableComponent& comp = dynamic_cast<UpdatableComponent&>(**it);
+      const UpdatableComponent& comp2 = dynamic_cast<const UpdatableComponent&>(**it2);
+      comp.AccuGradient(comp2,thr,thrN);
+    }
+  }
+}
+
+
+void Network::Update(int thr, int thrN) {
+  LayeredType::iterator it;
+
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    if((*it)->IsUpdatable()) {
+      UpdatableComponent& comp = dynamic_cast<UpdatableComponent&>(**it);
+      comp.Update(thr,thrN);
+    }
+  }
+}
+
+
+Network* Network::Clone() {
+  Network* net = new Network;
+  LayeredType::iterator it;
+  for(it = mNnet.begin(); it != mNnet.end(); ++it) {
+    //clone
+    net->mNnet.push_back((*it)->Clone());
+    //connect network
+    if(net->mNnet.size() > 1) {
+      Component* last = *(net->mNnet.end()-1);
+      Component* prev = *(net->mNnet.end()-2);
+      last->SetInput(prev->GetOutput());
+      prev->SetErrorInput(last->GetErrorOutput());
+    }
+  }
+
+  //copy the learning rate
+  //net->SetLearnRate(GetLearnRate());
+
+  return net;
+}
+
+
+void Network::ReadNetwork(const char* pSrc) {
+  std::ifstream in(pSrc);
+  if(!in.good()) {
+    Error(std::string("Error, cannot read model: ")+pSrc);
+  }
+  ReadNetwork(in);
+  in.close();
+}
+
+  
+
+void Network::ReadNetwork(std::istream& rIn) {
+  //get the network elements from a factory
+  Component *pComp;
+  while(NULL != (pComp = ComponentFactory(rIn))) 
+    mNnet.push_back(pComp);
+}
+
+
+void Network::WriteNetwork(const char* pDst) {
+  std::ofstream out(pDst);
+  if(!out.good()) {
+    Error(std::string("Error, cannot write model: ")+pDst);
+  }
+  WriteNetwork(out);
+  out.close();
+}
+
+
+void Network::WriteNetwork(std::ostream& rOut) {
+  //dump all the componetns
+  LayeredType::iterator it;
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    ComponentDumper(rOut, **it);
+  }
+}
+ 
+
+Component*
+Network::
+ComponentFactory(std::istream& rIn)
+{
+  rIn >> std::ws;
+  if(rIn.eof()) return NULL;
+
+  Component* pRet=NULL;
+  Component* pPred=NULL;
+
+  std::string componentTag;
+  size_t nInputs, nOutputs;
+
+  rIn >> std::ws;
+  rIn >> componentTag;
+  if(componentTag == "") return NULL; //nothing left in the file
+
+  //make it lowercase
+  std::transform(componentTag.begin(), componentTag.end(), 
+                 componentTag.begin(), tolower);
+
+  //the 'endblock' tag terminates the network
+  if(componentTag == "<endblock>") return NULL;
+
+  
+  if(componentTag[0] != '<' || componentTag[componentTag.size()-1] != '>') {
+    Error(std::string("Invalid component tag:")+componentTag);
+  }
+
+  rIn >> std::ws;
+  rIn >> nOutputs;
+  rIn >> std::ws;
+  rIn >> nInputs;
+  assert(nInputs > 0 && nOutputs > 0);
+
+  //make coupling with predecessor
+  if(mNnet.size() == 0) {
+    pPred = NULL;
+  } else {
+    pPred = mNnet.back();
+  }
+  
+  //array with list of component tags
+  static const std::string TAGS[] = {
+    "<biasedlinearity>",
+    "<sharedlinearity>",
+    
+    "<sigmoid>",
+    "<softmax>",
+    "<blocksoftmax>",
+
+    "<expand>",
+    "<copy>",
+    "<transpose>",
+    "<blocklinearity>",
+    "<bias>",
+    "<window>",
+    "<log>",
+
+    "<blockarray>",
+  };
+
+  static const int n_tags = sizeof(TAGS) / sizeof(TAGS[0]);
+  int i = 0;
+  for(i=0; i<n_tags; i++) {
+    if(componentTag == TAGS[i]) break;
+  }
+  
+  //switch according to position in array TAGS
+  switch(i) {
+    case 0: pRet = new BiasedLinearity(nInputs,nOutputs,pPred); break;
+    case 1: pRet = new SharedLinearity(nInputs,nOutputs,pPred); break;
+
+    case 2: pRet = new Sigmoid(nInputs,nOutputs,pPred); break;
+    case 3: pRet = new Softmax(nInputs,nOutputs,pPred); break;
+    case 4: pRet = new BlockSoftmax(nInputs,nOutputs,pPred); break;
+
+    case 5: pRet = new Expand(nInputs,nOutputs,pPred); break;
+    case 6: pRet = new Copy(nInputs,nOutputs,pPred); break;
+    case 7: pRet = new Transpose(nInputs,nOutputs,pPred); break;
+    case 8: pRet = new BlockLinearity(nInputs,nOutputs,pPred); break;
+    case 9: pRet = new Bias(nInputs,nOutputs,pPred); break;
+    case 10: pRet = new Window(nInputs,nOutputs,pPred); break;
+    case 11: pRet = new Log(nInputs,nOutputs,pPred); break;
+    
+    case 12: pRet = new BlockArray(nInputs,nOutputs,pPred); break;
+
+    default: Error(std::string("Unknown Component tag:")+componentTag);
+  }
+ 
+  //read params if it is updatable component
+  pRet->ReadFromStream(rIn);
+  //return
+  return pRet;
+}
+
+
+void
+Network::
+ComponentDumper(std::ostream& rOut, Component& rComp)
+{
+  //use tags of all the components; or the identification codes
+  //array with list of component tags
+  static const Component::ComponentType TYPES[] = {
+    Component::BIASED_LINEARITY,
+    Component::SHARED_LINEARITY,
+    
+    Component::SIGMOID,
+    Component::SOFTMAX,
+    Component::BLOCK_SOFTMAX,
+
+    Component::EXPAND,
+    Component::COPY,
+    Component::TRANSPOSE,
+    Component::BLOCK_LINEARITY,
+    Component::BIAS,
+    Component::WINDOW,
+    Component::LOG,
+
+    Component::BLOCK_ARRAY,
+  };
+  static const std::string TAGS[] = {
+    "<biasedlinearity>",
+    "<sharedlinearity>",
+
+    "<sigmoid>",
+    "<softmax>",
+    "<blocksoftmax>",
+
+    "<expand>",
+    "<copy>",
+    "<transpose>",
+    "<blocklinearity>",
+    "<bias>",
+    "<window>",
+    "<log>",
+
+    "<blockarray>",
+  };
+  static const int MAX = sizeof TYPES / sizeof TYPES[0];
+
+  int i;
+  for(i=0; i<MAX; ++i) {
+    if(TYPES[i] == rComp.GetType()) break;
+  }
+  if(i == MAX) Error("Unknown ComponentType");
+  
+  //dump the component tag
+  rOut << TAGS[i] << " " 
+       << rComp.GetNOutputs() << " " 
+       << rComp.GetNInputs() << std::endl;
+
+  //dump the parameters (if any)
+  rComp.WriteToStream(rOut);
+}
+
+
+
+  
+} //namespace
+
diff --git a/src/TNetLib/Nnet.h b/src/TNetLib/Nnet.h
new file mode 100644
index 0000000..12e2585
--- /dev/null
+++ b/src/TNetLib/Nnet.h
@@ -0,0 +1,194 @@
+#ifndef _NETWORK_H_
+#define _NETWORK_H_
+
+#include "Component.h"
+#include "BiasedLinearity.h"
+#include "SharedLinearity.h"
+#include "Activation.h"
+
+#include "Vector.h"
+
+#include <vector>
+
+
+namespace TNet {
+
+class Network
+{
+//////////////////////////////////////
+// Typedefs
+typedef std::vector<Component*> LayeredType;
+  
+  //////////////////////////////////////
+  // Disable copy construction and assignment
+ private:
+  Network(Network&); 
+  Network& operator=(Network&);
+   
+ public:
+  // allow incomplete network creation
+  Network() 
+  { }
+
+  ~Network();
+
+  int Layers() const
+  { return mNnet.size(); }
+
+  Component& Layer(int i)
+  { return *mNnet[i]; }
+ 
+  const Component& Layer(int i) const
+  { return *mNnet[i]; }
+
+  /// Feedforward the data per blocks, this needs less memory, 
+  /// and allows to process very long files.
+  /// It does not trim the *_frm_ext, but uses it 
+  /// for concatenation of segments
+  void Feedforward(const Matrix<BaseFloat>& in, Matrix<BaseFloat>& out, 
+                   size_t start_frm_ext, size_t end_frm_ext);
+  /// forward the data to the output
+  void Propagate(const Matrix<BaseFloat>& in, Matrix<BaseFloat>& out);
+  /// backpropagate the error while calculating the gradient
+  void Backpropagate(const Matrix<BaseFloat>& globerr); 
+
+  /// accumulate the gradient from other networks
+  void AccuGradient(const Network& src, int thr, int thrN);
+  /// update weights, reset the accumulator
+  void Update(int thr, int thrN);
+  
+  Network* Clone(); ///< Clones the network
+
+  void ReadNetwork(const char* pSrc);     ///< read the network from file
+  void ReadNetwork(std::istream& rIn);    ///< read the network from stream
+  void WriteNetwork(const char* pDst);    ///< write network to file
+  void WriteNetwork(std::ostream& rOut);  ///< write network to stream
+
+  size_t GetNInputs() const; ///< Dimensionality of the input features
+  size_t GetNOutputs() const; ///< Dimensionality of the desired vectors
+
+  void SetLearnRate(BaseFloat learnRate); ///< set the learning rate value
+  BaseFloat GetLearnRate();  ///< get the learning rate value
+
+  void SetWeightcost(BaseFloat l2); ///< set the L2 regularization const
+
+  void ResetBunchsize(); ///< reset the frame counter (needed for L2 regularization
+  void AccuBunchsize(const Network& src); ///< accumulate frame counts in bunch (needed in L2 regularization
+
+ private:
+  /// Creates a component by reading from stream
+  Component* ComponentFactory(std::istream& In);
+  /// Dumps component into a stream
+  void ComponentDumper(std::ostream& rOut, Component& rComp);
+
+ private:
+  LayeredType mNnet; ///< container with the network layers
+
+};
+  
+
+//////////////////////////////////////////////////////////////////////////
+// INLINE FUNCTIONS 
+// Network::
+inline Network::~Network() {
+  //delete all the components
+  LayeredType::iterator it;
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    delete *it;
+  }
+}
+
+    
+inline size_t Network::GetNInputs() const {
+  assert(mNnet.size() > 0);
+  return mNnet.front()->GetNInputs();
+}
+
+
+inline size_t
+Network::
+GetNOutputs() const
+{
+  assert(mNnet.size() > 0);
+  return mNnet.back()->GetNOutputs();
+}
+
+
+
+inline void
+Network::
+SetLearnRate(BaseFloat learnRate)
+{
+  LayeredType::iterator it;
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    if((*it)->IsUpdatable()) {
+      dynamic_cast<UpdatableComponent*>(*it)->LearnRate(learnRate);
+    }
+  }
+}
+
+
+inline BaseFloat
+Network::
+GetLearnRate()
+{
+  //TODO - learn rates may differ layer to layer
+  assert(mNnet.size() > 0);
+  for(size_t i=0; i<mNnet.size(); i++) {
+    if(mNnet[i]->IsUpdatable()) {
+      return dynamic_cast<UpdatableComponent*>(mNnet[i])->LearnRate();
+    }
+  }
+  Error("No updatable NetComponents");
+  return -1;
+}
+
+
+inline void
+Network::
+SetWeightcost(BaseFloat l2)
+{
+  LayeredType::iterator it;
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    if((*it)->IsUpdatable()) {
+      dynamic_cast<UpdatableComponent*>(*it)->Weightcost(l2);
+    }
+  }
+}
+
+
+inline void 
+Network::
+ResetBunchsize()
+{
+  LayeredType::iterator it;
+  for(it=mNnet.begin(); it!=mNnet.end(); ++it) {
+    if((*it)->IsUpdatable()) {
+      dynamic_cast<UpdatableComponent*>(*it)->Bunchsize(0);
+    }
+  }
+}
+
+inline void
+Network::
+AccuBunchsize(const Network& src)
+{
+  assert(Layers() == src.Layers());
+  assert(Layers() > 0);
+
+  for(int i=0; i<Layers(); i++) {
+    if(Layer(i).IsUpdatable()) {
+      UpdatableComponent& tgt_comp = dynamic_cast<UpdatableComponent&>(Layer(i));
+      const UpdatableComponent& src_comp = dynamic_cast<const UpdatableComponent&>(src.Layer(i));
+      tgt_comp.Bunchsize(tgt_comp.Bunchsize()+src_comp.GetOutput().Rows());
+    }
+  }
+}
+
+  
+
+} //namespace
+
+#endif
+
+
diff --git a/src/TNetLib/ObjFun.cc b/src/TNetLib/ObjFun.cc
new file mode 100644
index 0000000..c899fb1
--- /dev/null
+++ b/src/TNetLib/ObjFun.cc
@@ -0,0 +1,231 @@
+
+#include "ObjFun.h"
+#include "Error.h"
+
+#include <limits>
+
+namespace TNet {
+
+
+ObjectiveFunction* ObjectiveFunction::Factory(ObjFunType type) {
+  ObjectiveFunction* ret = NULL;
+  switch(type) {
+    case MEAN_SQUARE_ERROR: ret = new MeanSquareError;    break;
+    case CROSS_ENTROPY:     ret = new CrossEntropy;       break;
+    default: Error("Unknown ObjectiveFunction type");
+  }
+  return ret;
+}
+
+
+/*
+ * MeanSquareError
+ */
+void MeanSquareError::Evaluate(const Matrix<BaseFloat>& net_out, const Matrix<BaseFloat>& target, Matrix<BaseFloat>* err) {
+  
+  //check dimensions
+  assert(net_out.Rows() == target.Rows());
+  assert(net_out.Cols() == target.Cols());
+  if(err->Rows() != net_out.Rows() || err->Cols() != net_out.Cols()) {
+    err->Init(net_out.Rows(),net_out.Cols());
+  }
+
+  //compute global gradient
+  err->Copy(net_out);
+  err->AddScaled(-1,target);
+
+  //compute loss function
+  double sum = 0;
+  for(size_t r=0; r<err->Rows(); r++) {
+    for(size_t c=0; c<err->Cols(); c++) {
+      BaseFloat val = (*err)(r,c);
+      sum += val*val;
+    }
+  }
+  error_ += sum/2.0;
+  frames_ += net_out.Rows();
+}
+
+
+std::string MeanSquareError::Report() {
+  std::stringstream ss;
+  ss << "Mse:" << error_ << " frames:" << frames_
+     << " err/frm:" << error_/frames_
+     << "\n";
+  return ss.str();
+}
+
+
+/*
+ * CrossEntropy
+ */
+
+///Find maximum in float array
+inline int FindMaxId(const BaseFloat* ptr, size_t N) {
+  BaseFloat mval = -1e20f;
+  int mid = -1;
+  for(size_t i=0; i<N; i++) {
+    if(ptr[i] > mval) {
+      mid = i; mval = ptr[i];
+    }
+  }
+  return mid;
+}
+
+
+void
+CrossEntropy::Evaluate(const Matrix<BaseFloat>& net_out, const Matrix<BaseFloat>& target, Matrix<BaseFloat>* err)
+{
+  //check dimensions
+  assert(net_out.Rows() == target.Rows());
+  assert(net_out.Cols() == target.Cols());
+  if(err->Rows() != net_out.Rows() || err->Cols() != net_out.Cols()) {
+    err->Init(net_out.Rows(),net_out.Cols());
+  }
+
+  //allocate confunsion buffers
+  if(confusion_mode_ != NO_CONF) {
+    if(confusion_.Rows() != target.Cols() || confusion_.Cols() != target.Cols()) {
+      confusion_.Init(target.Cols(),target.Cols());
+      confusion_count_.Init(target.Cols());
+      diag_confusion_.Init(target.Cols());
+    }
+  }
+
+  //compute global gradient (assuming on softmax input)
+  err->Copy(net_out);
+  err->AddScaled(-1,target);
+
+  //collect max values
+  std::vector<size_t> max_target_id(target.Rows());
+  std::vector<size_t> max_netout_id(target.Rows());
+  //check correct classification
+  int corr = 0;
+  for(size_t r=0; r<net_out.Rows(); r++) {
+    int id_netout = FindMaxId(net_out[r].pData(),net_out.Cols());
+    int id_target = FindMaxId(target[r].pData(),target.Cols());
+    if(id_netout == id_target) corr++;
+    max_target_id[r] = id_target;//store the max value
+    max_netout_id[r] = id_netout;
+  }
+
+  //compute loss function
+  double sumerr = 0;
+  for(size_t r=0; r<net_out.Rows(); r++) {
+    if(target(r,max_target_id[r]) == 1.0) {
+      //pick the max value..., rest is zero
+      BaseFloat val = log(net_out(r,max_target_id[r]));
+      if(val < -1e10f) val = -1e10f;
+      sumerr += val;
+    } else {
+      //process whole posterior vect.
+      for(size_t c=0; c<net_out.Cols(); c++) {
+        if(target(r,c) != 0.0) {
+          BaseFloat val = target(r,c)*log(net_out(r,c));
+          if(val < -1e10f) val = -1e10f;
+          sumerr += val;
+        }
+      }
+    }
+  }
+
+  //accumulate confusuion network
+  if(confusion_mode_ != NO_CONF) {
+    for(size_t r=0; r<net_out.Rows(); r++) {
+      int id_target = max_target_id[r];
+      int id_netout = max_netout_id[r];
+      switch(confusion_mode_) {
+        case MAX_CONF:
+          confusion_(id_target,id_netout) += 1;
+          break;
+        case SOFT_CONF:
+          confusion_[id_target].Add(net_out[r]);
+          break;
+        case DIAG_MAX_CONF:
+          diag_confusion_[id_target] += ((id_target==id_netout)?1:0);
+          break;
+        case DIAG_SOFT_CONF:
+          diag_confusion_[id_target] += net_out[r][id_target];
+          break;
+        default:
+          KALDI_ERR << "unknown confusion type" << confusion_mode_;
+      }
+      confusion_count_[id_target] += 1;
+    }
+  }
+
+  error_ -= sumerr;
+  frames_ += net_out.Rows();
+  corr_ += corr;
+}
+
+
+std::string CrossEntropy::Report() {
+  std::stringstream ss;
+  ss << "Xent:" << error_ << " frames:" << frames_
+     << " err/frm:" << error_/frames_
+     << " correct[" << 100.0*corr_/frames_ << "%]"
+     << "\n";
+
+  if(confusion_mode_ != NO_CONF) {
+    //read class tags
+    std::vector<std::string> tag;
+    { 
+      std::ifstream ifs(output_label_map_);
+      assert(ifs.good());
+      std::string str;
+      while(!ifs.eof()) {
+        ifs >> str;
+        tag.push_back(str);
+      }
+    }
+    assert(confusion_count_.Dim() <= tag.size());
+
+    //print confusion matrix
+    if(confusion_mode_ == MAX_CONF || confusion_mode_ == SOFT_CONF) {
+      ss << "Row:label Col:hyp\n" << confusion_ << "\n";
+    }
+    
+    //***print per-target accuracies
+    for(int i=0; i<confusion_count_.Dim(); i++) {
+      //get the numerator
+      BaseFloat numerator = 0.0;
+      switch (confusion_mode_) {
+        case MAX_CONF: case SOFT_CONF:
+          numerator = confusion_[i][i];
+          break;
+        case DIAG_MAX_CONF: case DIAG_SOFT_CONF:
+          numerator = diag_confusion_[i];
+          break;
+        default:
+          KALDI_ERR << "Usupported confusion mode:" << confusion_mode_;
+      }
+      //add line to report
+      ss << std::setw(30) << tag[i] << " " 
+         << std::setw(10) << 100.0*numerator/confusion_count_[i] << "%" 
+         << " [" << numerator << "/" << confusion_count_[i] << "]\n";
+    } //***print per-target accuracies
+  }// != NO_CONF
+
+  return ss.str();
+}
+
+
+void CrossEntropy::MergeStats(const ObjectiveFunction& inst) { 
+  const CrossEntropy& xent = dynamic_cast<const CrossEntropy&>(inst);
+  frames_ += xent.frames_; error_ += xent.error_; corr_ += xent.corr_;
+  //sum the confustion statistics
+  if(confusion_mode_ != NO_CONF) {
+    if(confusion_.Rows() != xent.confusion_.Rows()) {
+      confusion_.Init(xent.confusion_.Rows(),xent.confusion_.Cols());
+      confusion_count_.Init(xent.confusion_count_.Dim());
+      diag_confusion_.Init(xent.diag_confusion_.Dim());
+    }
+    confusion_.Add(xent.confusion_);
+    confusion_count_.Add(xent.confusion_count_);
+    diag_confusion_.Add(xent.diag_confusion_);
+  }
+}
+ 
+
+} // namespace TNet
diff --git a/src/TNetLib/ObjFun.h b/src/TNetLib/ObjFun.h
new file mode 100644
index 0000000..c458340
--- /dev/null
+++ b/src/TNetLib/ObjFun.h
@@ -0,0 +1,160 @@
+#ifndef _TNET_OBJ_FUN_H
+#define _TNET_OBJ_FUN_H
+
+#include <cassert>
+#include <limits>
+#include <cmath>
+
+#include "Matrix.h"
+#include "Vector.h"
+
+namespace TNet {
+
+  /**
+   * General interface for objective functions
+   */
+  class ObjectiveFunction
+  {
+    public:
+    /// Enum with objective function types
+    typedef enum { 
+      OBJ_FUN_I = 0x0300, 
+      MEAN_SQUARE_ERROR, 
+      CROSS_ENTROPY, 
+    } ObjFunType;
+    
+    public:
+      /// Factory for creating objective function instances
+      static ObjectiveFunction* Factory(ObjFunType type);
+    
+    //////////////////////////////////////////////////////////////
+    // Interface specification
+    protected:
+      ObjectiveFunction() { }; /// constructor
+    public:
+      virtual ~ObjectiveFunction() { };  /// destructor
+
+      virtual ObjFunType GetType() = 0;
+      virtual const char* GetName() = 0;
+      virtual ObjectiveFunction* Clone() = 0; 
+
+      ///calculate error of network output
+      virtual void Evaluate(const Matrix<BaseFloat>& net_out, const Matrix<BaseFloat>& target, Matrix<BaseFloat>* err) = 0;
+ 
+      ///get the accumulated error
+      virtual double GetError() = 0;
+      ///the number of processed frames 
+      virtual size_t GetFrames() = 0;
+       
+      ///report the error to string 
+      virtual std::string Report() = 0;     
+
+      ///sum the frame counts from more instances
+      virtual void MergeStats(const ObjectiveFunction& inst) = 0;
+  };
+
+
+
+  /**
+   * Mean square error function
+   */
+  class MeanSquareError : public ObjectiveFunction
+  {
+   public:
+    MeanSquareError()
+     : ObjectiveFunction(), frames_(0), error_(0)
+    { }
+
+    ~MeanSquareError()
+    { }
+
+    ObjFunType GetType()
+    { return MEAN_SQUARE_ERROR; }
+
+    const char* GetName()
+    { return "<MeanSquareError>"; }
+
+    ObjectiveFunction* Clone()
+    { return new MeanSquareError(*this); }
+    
+    void Evaluate(const Matrix<BaseFloat>& net_out, const Matrix<BaseFloat>& target, Matrix<BaseFloat>* err);
+
+    size_t GetFrames()
+    { return frames_; }
+    
+    double GetError()
+    { return error_; }
+
+    std::string Report();    
+     
+    void MergeStats(const ObjectiveFunction& inst) { 
+      const MeanSquareError& mse = dynamic_cast<const MeanSquareError&>(inst);
+      frames_ += mse.frames_; error_ += mse.error_; 
+    }
+   
+   private:
+    size_t frames_;
+    double error_;
+
+  };
+
+
+  /**
+   * Cross entropy error function
+   */
+  class CrossEntropy : public ObjectiveFunction
+  {
+   public:
+    enum ConfusionMode { NO_CONF=0, MAX_CONF, SOFT_CONF, DIAG_MAX_CONF, DIAG_SOFT_CONF };
+
+   public:
+    CrossEntropy()
+     : ObjectiveFunction(), frames_(0), error_(0), corr_(0), confusion_mode_(NO_CONF), output_label_map_(NULL)
+    { }
+
+    ~CrossEntropy()
+    { }
+
+    ObjFunType GetType()
+    { return CROSS_ENTROPY; }
+
+    const char* GetName() 
+    { return "<cross_entropy>"; }
+
+    ObjectiveFunction* Clone()
+    { return new CrossEntropy(*this); }
+
+    void Evaluate(const Matrix<BaseFloat>& net_out, const Matrix<BaseFloat>& target, Matrix<BaseFloat>* err);
+
+    size_t GetFrames()
+    { return frames_; }
+    
+    double GetError()
+    { return error_; }
+
+    void SetConfusionMode(enum ConfusionMode m)
+    { confusion_mode_ = m; }
+
+    void SetOutputLabelMap(const char* map)
+    { output_label_map_ = map; }
+
+    std::string Report();    
+     
+    void MergeStats(const ObjectiveFunction& inst);   
+   private:
+    size_t frames_;
+    double error_;
+    size_t corr_;
+ 
+    ConfusionMode confusion_mode_;
+    Matrix<float> confusion_;
+    Vector<int> confusion_count_;
+    Vector<double> diag_confusion_;
+    const char* output_label_map_;
+  };
+ 
+
+} //namespace TNet
+
+
+#endif
diff --git a/src/TNetLib/Platform.h b/src/TNetLib/Platform.h
new file mode 100644
index 0000000..628b9cd
--- /dev/null
+++ b/src/TNetLib/Platform.h
@@ -0,0 +1,402 @@
+#ifndef _TNET_PLATFORM_H
+#define _TNET_PLATFORM_H
+
+/**
+ * \file Platform.h
+ * \brief DNN training class multicore version
+ */
+
+#include "Thread.h"
+#include "Matrix.h"
+
+#include "Features.h"
+#include "Labels.h"
+
+#include "Cache.h"
+#include "Nnet.h"
+#include "ObjFun.h"
+
+#include "Mutex.h"
+#include "Semaphore.h"
+#include "Barrier.h"
+#include "Thread.h"
+
+#include <vector>
+#include <list>
+#include <iterator>
+
+namespace TNet {
+
+class PlatformThread;
+
+class Platform {
+
+/*
+* Variables to be initialized directly from the main function
+*/
+public:
+  FeatureRepository feature_;   ///< Features specified in the input arguments and script file
+  LabelRepository label_;       ///< Labels specified in the lable map file
+
+  Network nnet_transf_;	        ///< NNet transform
+  Network nnet_;                ///< N network
+  ObjectiveFunction* obj_fun_;  ///< Specified in the ObjectiveFunction
+
+  int bunchsize_;
+  int cachesize_;
+  bool randomize_;
+   
+  int start_frm_ext_;
+  int end_frm_ext_;
+
+  int trace_;
+  bool crossval_;
+  
+  long int seed_;
+
+ /*
+  * Variables to be used internally during the multi-threaded training
+  */
+ private:
+  Semaphore semaphore_read_;
+ 
+  std::vector<std::list<Matrix<BaseFloat>*> > feature_buf_;
+  std::vector<std::list<Matrix<BaseFloat>*> > label_buf_;
+  std::vector<Mutex> mutex_buf_;
+
+  std::vector<Network*> nnet_transf2_;
+
+  std::vector<Cache> cache_;
+
+  std::vector<Network*> nnet2_;
+  std::vector<ObjectiveFunction*> obj_fun2_;
+  std::vector<bool> sync_mask_;
+
+  Barrier barrier_;
+  bool end_reading_;
+  std::vector<Timer> tim_;
+  std::vector<double> tim_accu_;
+
+  int num_thr_;
+  Semaphore semaphore_endtrain_;
+  Semaphore semaphore_endtrain2_;
+
+ public:
+  Mutex cout_mutex_;
+
+ /*
+  * Methods
+  */
+ public:
+  Platform()
+   : bunchsize_(0), cachesize_(0), randomize_(false),
+     start_frm_ext_(0), end_frm_ext_(0), trace_(0),
+     crossval_(false), seed_(0),
+     end_reading_(false), num_thr_(0)
+  { }
+
+  ~Platform()
+  {
+    for(size_t i=0; i<nnet_transf2_.size(); i++) {
+      delete nnet_transf2_[i];
+    }
+    for(size_t i=0; i<nnet2_.size(); i++) {
+      delete nnet2_[i];
+    }
+    for(size_t i=0; i<obj_fun2_.size(); i++) {
+      delete obj_fun2_[i];
+    }
+  }
+ 
+  /// Run the training using num_threads threads
+  void RunTrain(int num_threads);
+
+ private:
+  /// The data-reading thread
+  void ReadData();
+  /// The training thread
+  void Thread(int thr);
+
+ friend class PlatformThread;
+};
+
+
+
+/**
+ * Inherit Thread for the training threads
+ */
+class PlatformThread : public Thread {
+ public:
+  PlatformThread(Platform* pf)
+   : platform_(*pf)
+  { }
+ 
+ private:
+  void Execute(void* arg) {
+    long long thr_id = reinterpret_cast<long long>(arg);
+    platform_.Thread(thr_id);
+  }
+   
+ private:
+  Platform& platform_;
+};
+
+
+
+
+
+void Platform::RunTrain(int num_thr) {
+  num_thr_ = num_thr;
+  
+  /*
+   * Initialize parallel training
+   */
+  feature_buf_.resize(num_thr);
+  label_buf_.resize(num_thr);
+  mutex_buf_.resize(num_thr);
+  cache_.resize(num_thr);
+  sync_mask_.resize(num_thr);
+  barrier_.SetThreshold(num_thr);
+
+  tim_.resize(num_thr);
+  tim_accu_.resize(num_thr,0.0);
+
+  int bunchsize = bunchsize_/num_thr;
+  int cachesize = (cachesize_/num_thr/bunchsize)*bunchsize;
+  std::cout << "Bunchsize:" << bunchsize << "*" << num_thr << "=" << bunchsize*num_thr
+            << " Cachesize:" << cachesize << "*" << num_thr << "=" << cachesize*num_thr << "\n";
+  for(int i=0; i<num_thr; i++) {
+    //clone transforms
+    nnet_transf2_.push_back(nnet_transf_.Clone()); 
+    //create cache
+    cache_[i].Init(cachesize,bunchsize,seed_);
+    cache_[i].Trace(trace_);
+    //clone networks
+    nnet2_.push_back(nnet_.Clone());
+    //clone objective function objects
+    obj_fun2_.push_back(obj_fun_->Clone());
+    //enable threads to sync weights
+    sync_mask_[i] = true;
+  }
+
+  /*
+   * Run training threads
+   */
+  std::vector<PlatformThread*> threads;
+  for(intptr_t i=0; i<num_thr; i++) {
+    PlatformThread* t = new PlatformThread(this);
+    t->Start(reinterpret_cast<void*>(i));
+    threads.push_back(t);
+  }
+
+  /*
+   * Read the training data
+   */
+  ReadData();
+
+  /*
+   * Wait for training to finish
+   */
+  semaphore_endtrain2_.Wait(); 
+
+}
+
+
+
+void Platform::ReadData() try {
+  cout_mutex_.Lock();  
+  std::cout << "queuesize " << feature_.QueueSize() << "\n";
+  cout_mutex_.Unlock();  
+
+  int thr = 0;
+  for(feature_.Rewind();!feature_.EndOfList();feature_.MoveNext()) {
+    Matrix<BaseFloat>* fea = new Matrix<BaseFloat>;
+    Matrix<BaseFloat>* lab = new Matrix<BaseFloat>;
+
+    feature_.ReadFullMatrix(*fea);
+    label_.GenDesiredMatrix(*lab,
+                            fea->Rows()-start_frm_ext_-end_frm_ext_,
+                            feature_.CurrentHeader().mSamplePeriod,
+                            feature_.Current().Logical().c_str());
+
+    
+    fea->CheckData(feature_.Current().Logical());
+
+    mutex_buf_[thr].Lock();
+    feature_buf_[thr].push_back(fea);
+    label_buf_[thr].push_back(lab);
+    mutex_buf_[thr].Unlock();
+
+    //suspend reading when shortest buffer has 50 matrices
+    if(thr == 0) {
+      int minsize=1e6;
+      for(size_t i=0; i<feature_buf_.size(); i++) {
+        int s = feature_buf_[i].size();
+        if(s < minsize) minsize = s;
+      }
+      if(minsize > 20) semaphore_read_.Wait();
+    }
+
+    thr = (thr+1) % num_thr_;
+  }
+
+  std::cout << "[Reading finished]\n" << std::flush; 
+  end_reading_ = true;
+
+} catch (std::exception& rExc) {
+  std::cerr << "Exception thrown" << std::endl;
+  std::cerr << rExc.what() << std::endl;
+  exit(1);
+}
+
+void Platform::Thread(int thr_id) try {
+
+  const int thr = thr_id; //make id const for safety!
+
+  while(1) {
+    //fill the cache
+    while(!cache_[thr].Full() && !(end_reading_ && (feature_buf_[thr].size() == 0))) {
+
+      if(feature_buf_[thr].size() <= 5) {
+        semaphore_read_.Post();//wake the reader
+      }
+      if(feature_buf_[thr].size() == 0) {
+        cout_mutex_.Lock();  
+        std::cout << "Thread" << thr << ",waiting for data\n";
+        cout_mutex_.Unlock();  
+        sleep(1);
+      } else {
+        //get the matrices
+        mutex_buf_[thr].Lock();
+        Matrix<BaseFloat>* fea = feature_buf_[thr].front();
+        Matrix<BaseFloat>* lab = label_buf_[thr].front();
+        feature_buf_[thr].pop_front();
+        label_buf_[thr].pop_front();
+        mutex_buf_[thr].Unlock();
+
+        //transform the features
+        Matrix<BaseFloat> fea_transf;
+        nnet_transf2_[thr]->Propagate(*fea,fea_transf);
+
+        //trim the ext
+        SubMatrix<BaseFloat> fea_trim(
+          fea_transf,
+          start_frm_ext_,
+          fea_transf.Rows()-start_frm_ext_-end_frm_ext_,
+          0,
+          fea_transf.Cols()
+        );
+
+        //add to cache
+        cache_[thr].AddData(fea_trim,*lab);
+
+        delete fea; delete lab;
+      }
+    }
+
+    //no more data, end training...
+    if(cache_[thr].Empty()) break;
+
+    if(randomize_) { cache_[thr].Randomize(); }
+
+
+    //std::cout << "Thread" << thr << ", Cache#" << nr_cache++ << "\n";
+
+    //train from cache
+    Matrix<BaseFloat> fea2,lab2,out,err;
+    while(!cache_[thr].Empty()) {
+      cache_[thr].GetBunch(fea2,lab2);
+      nnet2_[thr]->Propagate(fea2,out);
+      obj_fun2_[thr]->Evaluate(out,lab2,&err);
+
+      if(!crossval_) {
+        nnet2_[thr]->Backpropagate(err);
+
+         tim_[thr].Start();
+        barrier_.Wait();//*********/
+         tim_[thr].End(); tim_accu_[thr] += tim_[thr].Val();
+       
+        //sum the gradient and bunchsize
+        for(int i=0; i<num_thr_; i++) {
+          if(sync_mask_[i]) {
+            nnet_.AccuGradient(*nnet2_[i],thr,num_thr_);
+            if(thr == 0) nnet_.AccuBunchsize(*nnet2_[i]);
+          }
+        }
+
+         tim_[thr].Start();
+        barrier_.Wait();//*********/
+         tim_[thr].End(); tim_accu_[thr] += tim_[thr].Val();
+
+        //update
+        nnet_.Update(thr,num_thr_);
+       
+         tim_[thr].Start();
+        barrier_.Wait();//*********/
+         tim_[thr].End(); tim_accu_[thr] += tim_[thr].Val();
+
+        //reset the bunchsize counter
+        if(thr == 0) nnet_.ResetBunchsize();
+      }
+    }
+
+  }
+
+  std::cout << "Thread" << thr << " end of data\n";
+  
+  //deactivate threads' update from summing
+  sync_mask_[thr] = false;
+  //increase number of finished threads
+  semaphore_endtrain_.Post();
+   
+  //synchronize the updates of other threads
+  while(1) {
+    barrier_.Wait();//*********/
+    if(semaphore_endtrain_.GetValue() == num_thr_) break;
+        
+    //sum the gradient and bunchsize
+    for(int i=0; i<num_thr_; i++) {
+      if(sync_mask_[i]) {
+        nnet_.AccuGradient(*nnet2_[i],thr,num_thr_);
+        if(thr == 0) nnet_.AccuBunchsize(*nnet2_[i]);
+      }
+    }
+    barrier_.Wait();//*********/
+    //update
+    nnet_.Update(thr,num_thr_);
+    barrier_.Wait();//*********/
+    //reset bunchsize counter
+    if(thr == 0) nnet_.ResetBunchsize();
+  }
+
+  //finally merge objfun stats
+  if(thr == 0) {
+    for(int i=0; i<num_thr_; i++) {
+      obj_fun_->MergeStats(*obj_fun2_[i]);
+    }
+    
+    cout_mutex_.Lock();
+    std::cout << "Barrier waiting times per thread\n"; 
+    std::copy(tim_accu_.begin(),tim_accu_.end(),std::ostream_iterator<double>(std::cout," "));
+    std::cout << "\n";
+    cout_mutex_.Unlock();
+  }
+
+  cout_mutex_.Lock();
+  std::cout << "[Thread" << thr << " finished]\n";
+  cout_mutex_.Unlock();
+
+  if(thr == 0) {
+    semaphore_endtrain2_.Post();
+  }
+} catch (std::exception& rExc) {
+  std::cerr << "Exception thrown" << std::endl;
+  std::cerr << rExc.what() << std::endl;
+  exit(1);
+}
+
+
+
+}//namespace TNet
+
+#endif
diff --git a/src/TNetLib/Platform.h~ b/src/TNetLib/Platform.h~
new file mode 100644
index 0000000..0981876
--- /dev/null
+++ b/src/TNetLib/Platform.h~
@@ -0,0 +1,402 @@
+#ifndef _TNET_PLATFORM_H
+#define _TNET_PLATFORM_H
+
+/**
+ * \file Platform.h
+ * \brief DNN training class
+ */
+
+#include "Thread.h"
+#include "Matrix.h"
+
+#include "Features.h"
+#include "Labels.h"
+
+#include "Cache.h"
+#include "Nnet.h"
+#include "ObjFun.h"
+
+#include "Mutex.h"
+#include "Semaphore.h"
+#include "Barrier.h"
+#include "Thread.h"
+
+#include <vector>
+#include <list>
+#include <iterator>
+
+namespace TNet {
+
+class PlatformThread;
+
+class Platform {
+
+/*
+* Variables to be initialized directly from the main function
+*/
+public:
+  FeatureRepository feature_;   ///< Features specified in the input arguments and script file
+  LabelRepository label_;       ///< Labels specified in the lable map file
+
+  Network nnet_transf_;	        ///< NNet transform
+  Network nnet_;                ///< N network
+  ObjectiveFunction* obj_fun_;  ///< Specified in the ObjectiveFunction
+
+  int bunchsize_;
+  int cachesize_;
+  bool randomize_;
+   
+  int start_frm_ext_;
+  int end_frm_ext_;
+
+  int trace_;
+  bool crossval_;
+  
+  long int seed_;
+
+ /*
+  * Variables to be used internally during the multi-threaded training
+  */
+ private:
+  Semaphore semaphore_read_;
+ 
+  std::vector<std::list<Matrix<BaseFloat>*> > feature_buf_;
+  std::vector<std::list<Matrix<BaseFloat>*> > label_buf_;
+  std::vector<Mutex> mutex_buf_;
+
+  std::vector<Network*> nnet_transf2_;
+
+  std::vector<Cache> cache_;
+
+  std::vector<Network*> nnet2_;
+  std::vector<ObjectiveFunction*> obj_fun2_;
+  std::vector<bool> sync_mask_;
+
+  Barrier barrier_;
+  bool end_reading_;
+  std::vector<Timer> tim_;
+  std::vector<double> tim_accu_;
+
+  int num_thr_;
+  Semaphore semaphore_endtrain_;
+  Semaphore semaphore_endtrain2_;
+
+ public:
+  Mutex cout_mutex_;
+
+ /*
+  * Methods
+  */
+ public:
+  Platform()
+   : bunchsize_(0), cachesize_(0), randomize_(false),
+     start_frm_ext_(0), end_frm_ext_(0), trace_(0),
+     crossval_(false), seed_(0),
+     end_reading_(false), num_thr_(0)
+  { }
+
+  ~Platform()
+  {
+    for(size_t i=0; i<nnet_transf2_.size(); i++) {
+      delete nnet_transf2_[i];
+    }
+    for(size_t i=0; i<nnet2_.size(); i++) {
+      delete nnet2_[i];
+    }
+    for(size_t i=0; i<obj_fun2_.size(); i++) {
+      delete obj_fun2_[i];
+    }
+  }
+ 
+  /// Run the training using num_threads threads
+  void RunTrain(int num_threads);
+
+ private:
+  /// The data-reading thread
+  void ReadData();
+  /// The training thread
+  void Thread(int thr);
+
+ friend class PlatformThread;
+};
+
+
+
+/**
+ * Inherit Thread for the training threads
+ */
+class PlatformThread : public Thread {
+ public:
+  PlatformThread(Platform* pf)
+   : platform_(*pf)
+  { }
+ 
+ private:
+  void Execute(void* arg) {
+    long long thr_id = reinterpret_cast<long long>(arg);
+    platform_.Thread(thr_id);
+  }
+   
+ private:
+  Platform& platform_;
+};
+
+
+
+
+
+void Platform::RunTrain(int num_thr) {
+  num_thr_ = num_thr;
+  
+  /*
+   * Initialize parallel training
+   */
+  feature_buf_.resize(num_thr);
+  label_buf_.resize(num_thr);
+  mutex_buf_.resize(num_thr);
+  cache_.resize(num_thr);
+  sync_mask_.resize(num_thr);
+  barrier_.SetThreshold(num_thr);
+
+  tim_.resize(num_thr);
+  tim_accu_.resize(num_thr,0.0);
+
+  int bunchsize = bunchsize_/num_thr;
+  int cachesize = (cachesize_/num_thr/bunchsize)*bunchsize;
+  std::cout << "Bunchsize:" << bunchsize << "*" << num_thr << "=" << bunchsize*num_thr
+            << " Cachesize:" << cachesize << "*" << num_thr << "=" << cachesize*num_thr << "\n";
+  for(int i=0; i<num_thr; i++) {
+    //clone transforms
+    nnet_transf2_.push_back(nnet_transf_.Clone()); 
+    //create cache
+    cache_[i].Init(cachesize,bunchsize,seed_);
+    cache_[i].Trace(trace_);
+    //clone networks
+    nnet2_.push_back(nnet_.Clone());
+    //clone objective function objects
+    obj_fun2_.push_back(obj_fun_->Clone());
+    //enable threads to sync weights
+    sync_mask_[i] = true;
+  }
+
+  /*
+   * Run training threads
+   */
+  std::vector<PlatformThread*> threads;
+  for(intptr_t i=0; i<num_thr; i++) {
+    PlatformThread* t = new PlatformThread(this);
+    t->Start(reinterpret_cast<void*>(i));
+    threads.push_back(t);
+  }
+
+  /*
+   * Read the training data
+   */
+  ReadData();
+
+  /*
+   * Wait for training to finish
+   */
+  semaphore_endtrain2_.Wait(); 
+
+}
+
+
+
+void Platform::ReadData() try {
+  cout_mutex_.Lock();  
+  std::cout << "queuesize " << feature_.QueueSize() << "\n";
+  cout_mutex_.Unlock();  
+
+  int thr = 0;
+  for(feature_.Rewind();!feature_.EndOfList();feature_.MoveNext()) {
+    Matrix<BaseFloat>* fea = new Matrix<BaseFloat>;
+    Matrix<BaseFloat>* lab = new Matrix<BaseFloat>;
+
+    feature_.ReadFullMatrix(*fea);
+    label_.GenDesiredMatrix(*lab,
+                            fea->Rows()-start_frm_ext_-end_frm_ext_,
+                            feature_.CurrentHeader().mSamplePeriod,
+                            feature_.Current().Logical().c_str());
+
+    
+    fea->CheckData(feature_.Current().Logical());
+
+    mutex_buf_[thr].Lock();
+    feature_buf_[thr].push_back(fea);
+    label_buf_[thr].push_back(lab);
+    mutex_buf_[thr].Unlock();
+
+    //suspend reading when shortest buffer has 50 matrices
+    if(thr == 0) {
+      int minsize=1e6;
+      for(size_t i=0; i<feature_buf_.size(); i++) {
+        int s = feature_buf_[i].size();
+        if(s < minsize) minsize = s;
+      }
+      if(minsize > 20) semaphore_read_.Wait();
+    }
+
+    thr = (thr+1) % num_thr_;
+  }
+
+  std::cout << "[Reading finished]\n" << std::flush; 
+  end_reading_ = true;
+
+} catch (std::exception& rExc) {
+  std::cerr << "Exception thrown" << std::endl;
+  std::cerr << rExc.what() << std::endl;
+  exit(1);
+}
+
+void Platform::Thread(int thr_id) try {
+
+  const int thr = thr_id; //make id const for safety!
+
+  while(1) {
+    //fill the cache
+    while(!cache_[thr].Full() && !(end_reading_ && (feature_buf_[thr].size() == 0))) {
+
+      if(feature_buf_[thr].size() <= 5) {
+        semaphore_read_.Post();//wake the reader
+      }
+      if(feature_buf_[thr].size() == 0) {
+        cout_mutex_.Lock();  
+        std::cout << "Thread" << thr << ",waiting for data\n";
+        cout_mutex_.Unlock();  
+        sleep(1);
+      } else {
+        //get the matrices
+        mutex_buf_[thr].Lock();
+        Matrix<BaseFloat>* fea = feature_buf_[thr].front();
+        Matrix<BaseFloat>* lab = label_buf_[thr].front();
+        feature_buf_[thr].pop_front();
+        label_buf_[thr].pop_front();
+        mutex_buf_[thr].Unlock();
+
+        //transform the features
+        Matrix<BaseFloat> fea_transf;
+        nnet_transf2_[thr]->Propagate(*fea,fea_transf);
+
+        //trim the ext
+        SubMatrix<BaseFloat> fea_trim(
+          fea_transf,
+          start_frm_ext_,
+          fea_transf.Rows()-start_frm_ext_-end_frm_ext_,
+          0,
+          fea_transf.Cols()
+        );
+
+        //add to cache
+        cache_[thr].AddData(fea_trim,*lab);
+
+        delete fea; delete lab;
+      }
+    }
+
+    //no more data, end training...
+    if(cache_[thr].Empty()) break;
+
+    if(randomize_) { cache_[thr].Randomize(); }
+
+
+    //std::cout << "Thread" << thr << ", Cache#" << nr_cache++ << "\n";
+
+    //train from cache
+    Matrix<BaseFloat> fea2,lab2,out,err;
+    while(!cache_[thr].Empty()) {
+      cache_[thr].GetBunch(fea2,lab2);
+      nnet2_[thr]->Propagate(fea2,out);
+      obj_fun2_[thr]->Evaluate(out,lab2,&err);
+
+      if(!crossval_) {
+        nnet2_[thr]->Backpropagate(err);
+
+         tim_[thr].Start();
+        barrier_.Wait();//*********/
+         tim_[thr].End(); tim_accu_[thr] += tim_[thr].Val();
+       
+        //sum the gradient and bunchsize
+        for(int i=0; i<num_thr_; i++) {
+          if(sync_mask_[i]) {
+            nnet_.AccuGradient(*nnet2_[i],thr,num_thr_);
+            if(thr == 0) nnet_.AccuBunchsize(*nnet2_[i]);
+          }
+        }
+
+         tim_[thr].Start();
+        barrier_.Wait();//*********/
+         tim_[thr].End(); tim_accu_[thr] += tim_[thr].Val();
+
+        //update
+        nnet_.Update(thr,num_thr_);
+       
+         tim_[thr].Start();
+        barrier_.Wait();//*********/
+         tim_[thr].End(); tim_accu_[thr] += tim_[thr].Val();
+
+        //reset the bunchsize counter
+        if(thr == 0) nnet_.ResetBunchsize();
+      }
+    }
+
+  }
+
+  std::cout << "Thread" << thr << " end of data\n";
+  
+  //deactivate threads' update from summing
+  sync_mask_[thr] = false;
+  //increase number of finished threads
+  semaphore_endtrain_.Post();
+   
+  //synchronize the updates of other threads
+  while(1) {
+    barrier_.Wait();//*********/
+    if(semaphore_endtrain_.GetValue() == num_thr_) break;
+        
+    //sum the gradient and bunchsize
+    for(int i=0; i<num_thr_; i++) {
+      if(sync_mask_[i]) {
+        nnet_.AccuGradient(*nnet2_[i],thr,num_thr_);
+        if(thr == 0) nnet_.AccuBunchsize(*nnet2_[i]);
+      }
+    }
+    barrier_.Wait();//*********/
+    //update
+    nnet_.Update(thr,num_thr_);
+    barrier_.Wait();//*********/
+    //reset bunchsize counter
+    if(thr == 0) nnet_.ResetBunchsize();
+  }
+
+  //finally merge objfun stats
+  if(thr == 0) {
+    for(int i=0; i<num_thr_; i++) {
+      obj_fun_->MergeStats(*obj_fun2_[i]);
+    }
+    
+    cout_mutex_.Lock();
+    std::cout << "Barrier waiting times per thread\n"; 
+    std::copy(tim_accu_.begin(),tim_accu_.end(),std::ostream_iterator<double>(std::cout," "));
+    std::cout << "\n";
+    cout_mutex_.Unlock();
+  }
+
+  cout_mutex_.Lock();
+  std::cout << "[Thread" << thr << " finished]\n";
+  cout_mutex_.Unlock();
+
+  if(thr == 0) {
+    semaphore_endtrain2_.Post();
+  }
+} catch (std::exception& rExc) {
+  std::cerr << "Exception thrown" << std::endl;
+  std::cerr << rExc.what() << std::endl;
+  exit(1);
+}
+
+
+
+}//namespace TNet
+
+#endif
diff --git a/src/TNetLib/Semaphore.cc b/src/TNetLib/Semaphore.cc
new file mode 100644
index 0000000..d149fb3
--- /dev/null
+++ b/src/TNetLib/Semaphore.cc
@@ -0,0 +1,64 @@
+
+#include "Semaphore.h"
+
+namespace TNet {
+  
+  Semaphore::
+  Semaphore(int initValue) 
+  {
+    mSemValue = initValue;
+    pthread_mutex_init(&mMutex, NULL);
+    pthread_cond_init(&mCond, NULL);
+  }
+
+  Semaphore::
+  ~Semaphore()
+  {
+    pthread_mutex_destroy(&mMutex);
+    pthread_cond_destroy(&mCond);
+  }
+
+  int 
+  Semaphore::
+  TryWait()
+  {
+    pthread_mutex_lock(&mMutex);
+    if(mSemValue > 0) {
+      mSemValue--;
+      pthread_mutex_unlock(&mMutex);
+      return 0;
+    }
+    pthread_mutex_unlock(&mMutex);
+    return -1;
+  }
+
+  void 
+  Semaphore::
+  Wait()
+  {
+    pthread_mutex_lock(&mMutex);
+    while(mSemValue <= 0) {
+      pthread_cond_wait(&mCond, &mMutex);
+    }
+    mSemValue--;
+    pthread_mutex_unlock(&mMutex);
+  }
+
+  void
+  Semaphore::
+  Post()
+  {
+    pthread_mutex_lock(&mMutex);
+    mSemValue++;
+    pthread_cond_signal(&mCond);
+    pthread_mutex_unlock(&mMutex);
+  }
+
+  int
+  Semaphore::
+  GetValue()
+  { return mSemValue; }
+
+
+
+} //namespace
diff --git a/src/TNetLib/Semaphore.h b/src/TNetLib/Semaphore.h
new file mode 100644
index 0000000..a28ee44
--- /dev/null
+++ b/src/TNetLib/Semaphore.h
@@ -0,0 +1,26 @@
+#ifndef _SEMPAHORE_H_
+#define _SEMPAHORE_H_
+
+#include <pthread.h>
+
+namespace TNet {
+  
+  class Semaphore {
+    public:
+      Semaphore(int initValue = 0); 
+      ~Semaphore();
+
+      int TryWait();
+      void Wait();
+      void Post();
+      int GetValue();
+
+    private:
+      int mSemValue;
+      pthread_mutex_t mMutex;
+      pthread_cond_t mCond;
+
+  };
+} //namespace
+
+#endif
diff --git a/src/TNetLib/SharedLinearity.cc b/src/TNetLib/SharedLinearity.cc
new file mode 100644
index 0000000..108212c
--- /dev/null
+++ b/src/TNetLib/SharedLinearity.cc
@@ -0,0 +1,277 @@
+
+
+#include "SharedLinearity.h"
+#include "cblas.h"
+
+namespace TNet {
+
+void 
+SharedLinearity::
+PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+{
+  //precopy bias
+  for(int k=0; k<mNInstances; k++) {
+    for(size_t r=0; r<X.Rows(); r++) {
+      memcpy(Y.pRowData(r)+k*mpBias->Dim(),mpBias->pData(),mpBias->Dim()*sizeof(BaseFloat));
+    }
+  }
+  
+  //multiply blockwise
+  for(int k=0; k<mNInstances; k++) {
+    SubMatrix<BaseFloat> xblock(X,0,X.Rows(),k*mpLinearity->Rows(),mpLinearity->Rows());
+    SubMatrix<BaseFloat> yblock(Y,0,Y.Rows(),k*mpLinearity->Cols(),mpLinearity->Cols());
+    yblock.BlasGemm(1.0,xblock,NO_TRANS,*mpLinearity,NO_TRANS,1.0);
+  }
+}
+
+
+void 
+SharedLinearity::
+BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y)
+{
+  for(int k=0; k<mNInstances; k++) {
+    SubMatrix<BaseFloat> xblock(X,0,X.Rows(),k*mpLinearity->Cols(),mpLinearity->Cols());
+    SubMatrix<BaseFloat> yblock(Y,0,Y.Rows(),k*mpLinearity->Rows(),mpLinearity->Rows());
+    yblock.BlasGemm(1.0,xblock,NO_TRANS,*mpLinearity,TRANS,1.0);
+  }
+}
+
+#if 0
+void 
+SharedLinearity::
+AccuUpdate() 
+{
+  BaseFloat N = 1;
+  /* 
+  //Not part of the interface!!!
+  if(mGradDivFrm) {
+    N = static_cast<BaseFloat>(GetInput().Rows());
+  }
+  */
+  BaseFloat mmt_gain = static_cast<BaseFloat>(1.0/(1.0-mMomentum));
+  N *= mmt_gain; //compensate higher gradient estimates due to momentum 
+  
+  //compensate augmented dyn. range of gradient caused by multiple instances
+  N *= static_cast<BaseFloat>(mNInstances); 
+
+  const Matrix<BaseFloat>& X = GetInput().Data();
+  const Matrix<BaseFloat>& E = GetErrorInput().Data();
+  //get gradient of shared linearity
+  for(int k=0; k<mNInstances; k++) {
+    SubMatrix<BaseFloat> xblock(X,0,X.Rows(),k*mLinearity.Rows(),mLinearity.Rows());
+    SubMatrix<BaseFloat> eblock(E,0,E.Rows(),k*mLinearity.Cols(),mLinearity.Cols());
+    mLinearityCorrection.BlasGemm(1.0,xblock,TRANS,eblock,NO_TRANS,((k==0)?mMomentum:1.0f));
+  }
+
+  //get gradient of shared bias
+  mBiasCorrection.Scale(mMomentum);
+  for(int r=0; r<E.Rows(); r++) {
+    for(int c=0; c<E.Cols(); c++) {
+      mBiasCorrection[c%mBiasCorrection.Dim()] += E(r,c);
+    }
+  }
+
+  //perform update 
+  mLinearity.AddScaled(-mLearningRate/N,mLinearityCorrection);
+  mBias.AddScaled(-mLearningRate/N,mBiasCorrection);
+  
+  //regularization weight decay
+  mLinearity.AddScaled(-mLearningRate*mWeightcost,mLinearity);
+}
+#endif
+
+void
+SharedLinearity::
+ReadFromStream(std::istream& rIn)
+{
+  //number of instances of shared weights in layer
+  rIn >> std::ws >> mNInstances;
+  if(mNInstances < 1) {
+    std::ostringstream os;
+    os << "Bad number of instances:" << mNInstances;
+    Error(os.str());
+  }
+  if(GetNInputs() % mNInstances != 0 || GetNOutputs() % mNInstances != 0) {
+    std::ostringstream os;
+    os << "Number of Inputs/Outputs must be divisible by number of instances"
+       << " Inputs:" << GetNInputs()
+       << " Outputs" << GetNOutputs()
+       << " Intances:" << mNInstances;
+    Error(os.str());
+  }
+    
+  //matrix is stored transposed as SNet does
+  BfMatrix transpose;
+  rIn >> transpose;
+  mLinearity = BfMatrix(transpose, TRANS);
+  //biases stored normally
+  rIn >> mBias;
+
+  if(transpose.Cols()*transpose.Rows() == 0) {
+    Error("Missing linearity matrix in network file");
+  }
+  if(mBias.Dim() == 0) {
+    Error("Missing bias vector in network file");
+  }
+
+
+  if(mLinearity.Cols() != (GetNOutputs() / mNInstances) || 
+     mLinearity.Rows() != (GetNInputs() / mNInstances) ||
+     mBias.Dim() != (GetNOutputs() / mNInstances)
+  ){
+    std::ostringstream os;
+    os << "Wrong dimensionalities of matrix/vector in network file\n"
+       << "Inputs:" << GetNInputs()
+       << " Outputs:" << GetNOutputs()
+       << "\n"
+       << "N-Instances:" << mNInstances
+       << "\n"
+       << "linearityCols:" << mLinearity.Cols() << "(" << mLinearity.Cols()*mNInstances << ")"
+       << " linearityRows:" << mLinearity.Rows() << "(" << mLinearity.Rows()*mNInstances << ")"
+       << " biasDims:" << mBias.Dim() << "(" << mBias.Dim()*mNInstances << ")"
+       << "\n";
+    Error(os.str());
+  }
+
+  mLinearityCorrection.Init(mLinearity.Rows(),mLinearity.Cols());
+  mBiasCorrection.Init(mBias.Dim());
+}
+
+ 
+void
+SharedLinearity::
+WriteToStream(std::ostream& rOut)
+{
+  rOut << mNInstances << std::endl;
+  //matrix is stored transposed as SNet does
+  BfMatrix transpose(mLinearity, TRANS);
+  rOut << transpose;
+  //biases stored normally
+  rOut << mBias;
+  rOut << std::endl;
+}
+
+
+void 
+SharedLinearity::
+Gradient() 
+{
+  const Matrix<BaseFloat>& X = GetInput();
+  const Matrix<BaseFloat>& E = GetErrorInput();
+  //get gradient of shared linearity
+  for(int k=0; k<mNInstances; k++) {
+    SubMatrix<BaseFloat> xblock(X,0,X.Rows(),k*mpLinearity->Rows(),mpLinearity->Rows());
+    SubMatrix<BaseFloat> eblock(E,0,E.Rows(),k*mpLinearity->Cols(),mpLinearity->Cols());
+    mLinearityCorrection.BlasGemm(1.0,xblock,TRANS,eblock,NO_TRANS,((k==0)?0.0f:1.0f));
+  }
+
+  //get gradient of shared bias
+  mBiasCorrection.Set(0.0f);
+  for(int r=0; r<E.Rows(); r++) {
+    for(int c=0; c<E.Cols(); c++) {
+      mBiasCorrection[c%mBiasCorrection.Dim()] += E(r,c);
+    }
+  }
+}
+
+
+void 
+SharedLinearity::
+AccuGradient(const UpdatableComponent& src, int thr, int thrN)
+{
+  //cast the argument
+  const SharedLinearity& src_comp = dynamic_cast<const SharedLinearity&>(src);
+
+  //allocate accumulators when needed
+  if(mLinearityCorrectionAccu.MSize() == 0) {
+    mLinearityCorrectionAccu.Init(mpLinearity->Rows(),mpLinearity->Cols());
+  }
+  if(mBiasCorrectionAccu.MSize() == 0) {
+    mBiasCorrectionAccu.Init(mpBias->Dim());
+  }
+ 
+
+  //assert the dimensions
+  /*
+  assert(mLinearityCorrection.Rows() == src_comp.mLinearityCorrection.Rows());
+  assert(mLinearityCorrection.Cols() == src_comp.mLinearityCorrection.Cols());
+  assert(mBiasCorrection.Dim() == src_comp.mBiasCorrection.Dim());
+  */
+
+  //need to find out which rows to sum...
+  int div = mLinearityCorrection.Rows() / thrN;
+  int mod = mLinearityCorrection.Rows() % thrN;
+
+  int origin = thr * div + ((mod > thr)? thr : mod);
+  int rows = div + ((mod > thr)? 1 : 0);
+
+  //std::cout << "[S" << thr << "," << origin << "," << rows << "]" << std::flush;
+
+  //create the matrix windows
+  const SubMatrix<BaseFloat> src_mat (
+    src_comp.mLinearityCorrection, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+  SubMatrix<double> tgt_mat (
+    mLinearityCorrectionAccu, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+  //sum the rows
+  Add(tgt_mat,src_mat);
+
+  //first thread will always sum the bias correction and adds frame count
+  if(thr == 0) {
+    //std::cout << "[BS" << thr << "]" << std::flush;
+    Add(mBiasCorrectionAccu,src_comp.mBiasCorrection);
+  }
+}
+
+
+void 
+SharedLinearity::
+Update(int thr, int thrN) 
+{
+  //need to find out which rows to sum...
+  int div = mLinearity.Rows() / thrN;
+  int mod = mLinearity.Rows() % thrN;
+
+  int origin = thr * div + ((mod > thr)? thr : mod);
+  int rows = div + ((mod > thr)? 1 : 0);
+
+  //std::cout << "[P" << thr << "," << origin << "," << rows << "]" << std::flush;
+
+  //get the matrix windows
+  SubMatrix<double> src_mat (
+    mLinearityCorrectionAccu, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+  SubMatrix<BaseFloat> tgt_mat (
+    mLinearity, 
+    origin, rows, 
+    0, mLinearityCorrection.Cols()
+  );
+
+  //TODO perform L2 regularization
+  //tgt_mat.AddScaled(tgt_mat, -mWeightcost * num_frames);
+
+  //update weights
+  AddScaled(tgt_mat, src_mat, -mLearningRate/static_cast<BaseFloat>(mNInstances));
+
+  //first thread always update bias
+  if(thr == 0) {
+    //std::cout << "[" << thr << "BP]" << std::flush;
+    AddScaled(mBias, mBiasCorrectionAccu, -mLearningRate/static_cast<BaseFloat>(mNInstances));
+  }
+
+  //reset the accumulators
+  src_mat.Zero();
+  if(thr == 0) {
+    mBiasCorrectionAccu.Zero();
+  }
+}
+
+ 
+} //namespace
diff --git a/src/TNetLib/SharedLinearity.h b/src/TNetLib/SharedLinearity.h
new file mode 100644
index 0000000..83feeee
--- /dev/null
+++ b/src/TNetLib/SharedLinearity.h
@@ -0,0 +1,103 @@
+#ifndef _CUSHARED_LINEARITY_H_
+#define _CUSHARED_LINEARITY_H_
+
+
+#include "Component.h"
+
+#include "Matrix.h"
+#include "Vector.h"
+
+
+namespace TNet {
+
+class SharedLinearity : public UpdatableComponent
+{
+ public:
+  SharedLinearity(size_t nInputs, size_t nOutputs, Component *pPred); 
+  ~SharedLinearity();  
+  
+  ComponentType GetType() const 
+  { return SHARED_LINEARITY; }
+
+  const char* GetName() const
+  { return "<SharedLinearity>"; }
+
+  Component* Clone() const;
+
+  void PropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y);
+  void BackpropagateFnc(const Matrix<BaseFloat>& X, Matrix<BaseFloat>& Y);
+
+  void ReadFromStream(std::istream& rIn);
+  void WriteToStream(std::ostream& rOut);
+
+  /// calculate gradient
+  void Gradient(); 
+  /// accumulate gradient from other components
+  void AccuGradient(const UpdatableComponent& src, int thr, int thrN);
+  /// update weights, reset the accumulator
+  void Update(int thr, int thrN);
+
+protected:
+  Matrix<BaseFloat> mLinearity;  ///< Matrix with neuron weights
+  Vector<BaseFloat> mBias;       ///< Vector with biases
+
+  Matrix<BaseFloat>* mpLinearity;
+  Vector<BaseFloat>* mpBias;
+
+  Matrix<BaseFloat> mLinearityCorrection; ///< Matrix for linearity updates
+  Vector<BaseFloat> mBiasCorrection;      ///< Vector for bias updates
+
+  Matrix<double> mLinearityCorrectionAccu; ///< Accumulator for linearity updates
+  Vector<double> mBiasCorrectionAccu;      ///< Accumulator for bias updates
+  
+  int mNInstances;
+};
+
+
+
+
+////////////////////////////////////////////////////////////////////////////
+// INLINE FUNCTIONS 
+// SharedLinearity::
+inline 
+SharedLinearity::
+SharedLinearity(size_t nInputs, size_t nOutputs, Component *pPred)
+  : UpdatableComponent(nInputs, nOutputs, pPred),
+    mpLinearity(&mLinearity), mpBias(&mBias), 
+    mNInstances(0)
+{ }
+
+
+inline
+SharedLinearity::
+~SharedLinearity()
+{ }
+
+
+inline
+Component*
+SharedLinearity::
+Clone() const
+{
+  SharedLinearity* ptr = new SharedLinearity(GetNInputs(),GetNOutputs(),NULL);
+  ptr->mpLinearity = mpLinearity;
+  ptr->mpBias = mpBias;
+
+  ptr->mLinearityCorrection.Init(mpLinearity->Rows(),mpLinearity->Cols());
+  ptr->mBiasCorrection.Init(mpBias->Dim());
+
+  ptr->mNInstances = mNInstances;
+
+  ptr->mLearningRate = mLearningRate;
+
+
+  return ptr;
+}
+
+
+
+} //namespace
+
+
+
+#endif
diff --git a/src/TNetLib/Thread.h b/src/TNetLib/Thread.h
new file mode 100644
index 0000000..ba6d7ba
--- /dev/null
+++ b/src/TNetLib/Thread.h
@@ -0,0 +1,53 @@
+#ifndef _TNET_THREAD_H
+#define _TNET_THREAD_H
+
+namespace TNet {
+
+class Thread {
+ public:
+  Thread() 
+  { }
+  virtual ~Thread() 
+  { }
+
+  int Start(void* arg);
+
+ protected:
+  static void* EntryPoint(void*);
+  virtual void Execute(void*) = 0; ///< Override this function
+  void* Arg() const { return arg_; }
+  void Arg(void* a) { arg_ = a; }
+
+ private:
+  pthread_t thread_id_;
+  void * arg_;
+};
+
+int Thread::Start(void * arg) {
+  Arg(arg); // store user data
+ 
+  int ret=0;
+  //create thread as detached (don't wait for it)
+  pthread_attr_t tattr;
+  ret |= pthread_attr_init(&tattr);
+  ret |= pthread_attr_setdetachstate(&tattr,PTHREAD_CREATE_DETACHED);
+  ret |= pthread_create(&thread_id_, &tattr, &Thread::EntryPoint, this);
+  if(ret != 0) KALDI_ERR << "Failed to create thread";
+  return ret;
+}
+
+/*static */
+void* Thread::EntryPoint(void* pthis) try {
+  Thread* pt = (Thread*)pthis;
+  pt->Execute(pt->Arg());
+  return NULL;
+} catch (std::exception& rExc) {
+  std::cerr << "Exception thrown" << std::endl;
+  std::cerr << rExc.what() << std::endl;
+  exit(1);
+}
+
+
+} //namespace TNet
+
+#endif
author	Joe Zhao <ztuowen@gmail.com>	2014-04-14 08:14:45 +0800
committer	Joe Zhao <ztuowen@gmail.com>	2014-04-14 08:14:45 +0800
commit	cccccbf6cca94a3eaf813b4468453160e91c332b (patch)
tree	23418cb73a10ae3b0688681a7f0ba9b06424583e /src/TNetLib
download	tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.tar.gz tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.tar.bz2 tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.zip