Merge pull request #8 from dhuth/master

w/ python test suite
author: Derick Huth <derickhuth@gmail.com> 2016-02-10 11:13:08 -0700
committer: Derick Huth <derickhuth@gmail.com> 2016-02-10 11:13:08 -0700
commit: 1dd03ee01bff2a70e758ce984476527f3ff42c68 (patch)
tree: 9731867c7019ec9b6ee111c8fa9f92a92119b5ec
parent: 4631ad76927d433da5d55c3c373a1dfd0f74c9d4 (diff)
parent: d68532f2f3ba332199f84818cb047d69a3f33588 (diff)
download: chill-1dd03ee01bff2a70e758ce984476527f3ff42c68.tar.gz
chill-1dd03ee01bff2a70e758ce984476527f3ff42c68.tar.bz2
chill-1dd03ee01bff2a70e758ce984476527f3ff42c68.zip
232 files changed, 9973 insertions, 93 deletions
diff --git a/Makefile.am b/Makefile.am
index 65463c3..ab2a5cc 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -4,7 +4,7 @@ build_date = "\"`date +%m/%d/%Y`\""
 ## Core Libraries ##
 core_libs     = -lm -lrose -lrt -lutil -lomega -lcodegen -ldl
 core_libs    += -lboost_date_time -lboost_filesystem -lboost_program_options
-core_libs    += -lboost_regex -lboost_system -lboost_thread -lboost_wave
+core_libs    += -lboost_regex -lboost_system -lboost_wave -lboost_iostreams
 
 core_libdirs  = -Lomega/code_gen/obj -Lomega/omega_lib/obj
 core_libdirs += -L$(ROSEHOME)/lib -L$(BOOSTHOME)/lib
@@ -12,7 +12,8 @@ core_libdirs += -L$(ROSEHOME)/lib -L$(BOOSTHOME)/lib
 
 
 ## Core Includes ##
-core_includes  = -Iomega/include -I$(ROSEHOME)/include
+core_includes  = -Iomega/include
+core_includes += -I$(ROSEHOME)/include
 core_includes += -I$(BOOSTHOME)/include
 
 
diff --git a/Makefile.in b/Makefile.in
index 444d53d..796ef49 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -315,7 +315,6 @@ LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
 LTLIBOBJS = @LTLIBOBJS@
-LUAHOME = @LUAHOME@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
 OBJEXT = @OBJEXT@
@@ -369,7 +368,6 @@ htmldir = @htmldir@
 includedir = @includedir@
 infodir = @infodir@
 install_sh = @install_sh@
-interface_lang = @interface_lang@
 libdir = @libdir@
 libexecdir = @libexecdir@
 localedir = @localedir@
@@ -395,16 +393,14 @@ target_alias = @target_alias@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
-use_lua = @use_lua@
-use_python = @use_python@
 SUBDIRS = omega/omega_lib/obj omega/code_gen/obj
 build_date = "\"`date +%m/%d/%Y`\""
 core_libs = -lm -lrose -lrt -lutil -lomega -lcodegen -ldl \
 	-lboost_date_time -lboost_filesystem -lboost_program_options \
-	-lboost_regex -lboost_system -lboost_thread -lboost_wave
+	-lboost_regex -lboost_system -lboost_wave -lboost_iostreams
 core_libdirs = -Lomega/code_gen/obj -Lomega/omega_lib/obj \
 	-L$(ROSEHOME)/lib -L$(BOOSTHOME)/lib
-core_includes = -Iomega/include -I$(ROSEHOME)/include \
+core_includes = -Iomega/include -I$(ROSEHOME)/include/rose \
 	-I$(BOOSTHOME)/include
 
 # Core #
diff --git a/aclocal.m4 b/aclocal.m4
index 1f1b272..6acbc5f 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -573,8 +573,7 @@ to "yes", and re-run configure.
 END
     AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
   fi
-fi
-])
+fi])
 
 dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion.  Do not
 dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
diff --git a/chill_run.cc b/chill_run.cc
index d33819b..59cd6e5 100644
--- a/chill_run.cc
+++ b/chill_run.cc
@@ -12,7 +12,7 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "chill_env.hh"
+//#include "chill_env.hh"
 
 #include "loop.hh"
 #include <omega.h>
diff --git a/configure b/configure
index 79b947a..df8d785 100755
--- a/configure
+++ b/configure
@@ -591,10 +591,6 @@ LUA_OPT_FALSE
 LUA_OPT_TRUE
 PYTHON_OPT_FALSE
 PYTHON_OPT_TRUE
-use_lua
-use_python
-LUAHOME
-interface_lang
 OMEGAHOME
 BOOSTHOME
 ROSEHOME
@@ -720,8 +716,6 @@ enable_coverage
 with_rose
 with_boost
 with_omega
-with_python
-with_lua
 '
       ac_precious_vars='build_alias
 host_alias
@@ -1376,11 +1370,6 @@ Optional Packages:
 
       --with-omega            set omega home
 
-      --with-python           set python as the interface languge
-
-
-      --with-lua              set lua as the interface langauge.
-
 Some influential environment variables:
   CC          C compiler command
   CFLAGS      C compiler flags
@@ -2560,7 +2549,6 @@ END
   fi
 fi
 
-
 #AC_SUBST([prefix],["$(pwd)/bin"])
 
 ### Use the C++ compiler for linking (C is default) ###
@@ -5378,75 +5366,43 @@ else
 fi
 
 
+#AC_ARG_WITH([python],[
+#    AS_HELP_STRING([--with-python],[set python as the interface languge])
+#    ],[
+#    AC_SUBST([interface_lang],[python])
+#    ],[
+#    AC_SUBST([interface_lang],[python])])
 
-# Check whether --with-python was given.
-if test "${with_python+set}" = set; then :
-  withval=$with_python;
-    interface_lang=python
-
-
-else
-
-    interface_lang=default
-
-fi
-
-
-
-# Check whether --with-lua was given.
-if test "${with_lua+set}" = set; then :
-  withval=$with_lua;
-    LUAHOME="${LUAHOME}"
-
-    interface_lang=lua
-
-
-else
-
-    LUAHOME="${LUAHOME}"
-
-fi
-
+#AC_ARG_WITH([lua],[
+#    AS_HELP_STRING([--with-lua],[set lua as the interface langauge.])],[
+#    AC_SUBST([LUAHOME], ["${LUAHOME}"])
+#    AC_SUBST([interface_lang], [lua])
+#    ],[
+#    AC_SUBST([LUAHOME], ["${LUAHOME}"])])
 
 #AC_ARG_WITH([interface],[
 #    AS_HELP_STRING([--with-interface],[select interface language])],[
 #    AC_SUBST([interface_lang],[$withval])],[
 #    AC_SUBST([interface_lang],[default])])
 
-if test "x$interface_lang" == xdefault; then :
-
-    if test "x$enable_cuda" == xyes; then :
-
-        use_python=yes
-
-        use_lua=no
-
-
-else
-
-        use_python=yes
-
-        use_lua=no
-
-fi
-fi
-
-if test "x$interface_lang" == xlua; then :
-
-    use_python=no
-
-    use_lua=yes
-
-fi
-if test "x$interface_lang" == xpython; then :
-
-    use_python=yes
-
-    use_lua=no
-
-fi
-
- if test "x$use_python" == xyes; then
+#AS_IF([test "x$interface_lang" == xdefault],[
+#    AS_IF([test "x$enable_cuda" == xyes],[
+#        AC_SUBST([use_python],[yes])
+#        AC_SUBST([use_lua],[no])
+#        AC_SUBST([interface_lang],[python])
+#        ],[
+#        AC_SUBST([use_python],[yes])
+#        AC_SUBST([use_lua],[no])
+#        AC_SUBST([interface_lang],[python])])],[])
+
+#AS_IF([test "x$interface_lang" == xlua],[
+#    AC_SUBST([use_python],[no])
+#    AC_SUBST([use_lua],[yes])],[])
+#AS_IF([test "x$interface_lang" == xpython],[
+#    AC_SUBST([use_python],[yes])
+#    AC_SUBST([use_lua],[no])],[])
+
+ if test 1 = 1; then
   PYTHON_OPT_TRUE=
   PYTHON_OPT_FALSE='#'
 else
@@ -5454,7 +5410,7 @@ else
   PYTHON_OPT_FALSE=
 fi
 
- if test "x$use_lua" == xyes; then
+ if test 1 = 0; then
   LUA_OPT_TRUE=
   LUA_OPT_FALSE='#'
 else
diff --git a/configure.ac b/configure.ac
index c8296b4..1dfedbc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -90,9 +90,11 @@ AS_IF([test "x$interface_lang" == xdefault],[
     AS_IF([test "x$enable_cuda" == xyes],[
         AC_SUBST([use_python],[yes])
         AC_SUBST([use_lua],[no])
+        AC_SUBST([interface_lang],[python])
         ],[
         AC_SUBST([use_python],[yes])
-        AC_SUBST([use_lua],[no])])],[])
+        AC_SUBST([use_lua],[no])
+        AC_SUBST([interface_lang],[python])])],[])
 
 AS_IF([test "x$interface_lang" == xlua],[
     AC_SUBST([use_python],[no])
@@ -101,8 +103,8 @@ AS_IF([test "x$interface_lang" == xpython],[
     AC_SUBST([use_python],[yes])
     AC_SUBST([use_lua],[no])],[])
 
-AM_CONDITIONAL([PYTHON_OPT],[test "x$use_python" == xyes])
-AM_CONDITIONAL([LUA_OPT],[test "x$use_lua" == xyes])
+AM_CONDITIONAL([PYTHON_OPT],[test "x$use_python" = xyes])
+AM_CONDITIONAL([LUA_OPT],[test "x$use_lua" = xyes])
 
 AC_CONFIG_HEADERS([include/config.h])
 AC_CONFIG_FILES([Makefile])
diff --git a/omega/aclocal.m4 b/omega/aclocal.m4
index ec90bbe..a2aaad2 100644
--- a/omega/aclocal.m4
+++ b/omega/aclocal.m4
@@ -633,8 +633,7 @@ to "yes", and re-run configure.
 END
     AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
   fi
-fi
-])
+fi])
 
 dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion.  Do not
 dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
diff --git a/omega/code_gen/obj/Makefile.in b/omega/code_gen/obj/Makefile.in
index a350281..876e7e9 100644
--- a/omega/code_gen/obj/Makefile.in
+++ b/omega/code_gen/obj/Makefile.in
@@ -269,7 +269,7 @@ libcodegen_a_SOURCES = ../src/codegen.cc ../src/CG_stringBuilder.cc \
 	../src/CG.cc ../src/CG_utils.cc ../src/rose_attributes.cc \
 	../src/CG_roseRepr.cc ../src/CG_roseBuilder.cc
 libcodegen_a_CFLAGS = -Wno-write-strings
-libcodegen_a_CPPFLAGS = -I../include -I../../omega_lib/include -I$(ROSEHOME)/include -I$(BOOSTHOME)/include
+libcodegen_a_CPPFLAGS = -I../include -I../../omega_lib/include -I$(ROSEHOME)/include/rose -I$(BOOSTHOME)/include
 all: all-am
 
 .SUFFIXES:
diff --git a/omega/configure b/omega/configure
index 3a9175c..20a4dfd 100755
--- a/omega/configure
+++ b/omega/configure
@@ -2513,7 +2513,6 @@ END
   fi
 fi
 
-
 ### Use the C++ compiler for linking (C is default) ###
 CCLD="$""(CXX)"
 
diff --git a/test-chill/Makefile b/test-chill/Makefile
new file mode 100644
index 0000000..d7238e8
--- /dev/null
+++ b/test-chill/Makefile
@@ -0,0 +1,210 @@
+###               ###
+### SVN variables ###
+###               ###
+SVN_USER=dhuth
+
+###                       ###
+### Notification          ###
+### (not implemented yet) ###
+NOTIFY_ON_FAILURE=False
+
+
+
+### Derived variables from config ###
+CHILLHOME?=$(STAGING_DIR)/chill
+SVN_CHILL=svn+ssh://$(SVN_USER)@shell.cs.utah.edu/uusoc/facility/res/hallresearch/svn_repo/resRepo/projects/chill
+SVN_CHILL_DEV=$(SVN_CHILL)/branches/cuda-chill-rose
+SVN_CHILL_RELEASE=$(SVN_CHILL)/release
+CHILL_DEV_SRC=$(STAGING_DIR)/chill-dev
+CHILL_RELEASE_SRC=$(STAGING_DIR)/chill-release
+OMEGAHOME?=$(STAGING_DIR)/omega
+SVN_OMEGA=svn+ssh://$(SVN_USER)@shell.cs.utah.edu/uusoc/facility/res/hallresearch/svn_repo/resRepo/projects/omega
+SVN_OMEGA_DEV=$(SVN_OMEGA)/branches/cuda-omega-rose
+SVN_OMEGA_RELEASE=$(SVN_OMEGA)/release
+OMEGA_DEV_SRC=$(STAGING_DIR)/omega-dev
+OMEGA_RELEASE_SRC=$(STAGING_DIR)/omega-release
+
+
+### Staging ###
+STAGING_DIR=$(shell pwd)/.staging
+STAGING_DIR_BIN=$(STAGING_DIR)/bin
+STAGING_DIR_WD=$(STAGING_DIR)/wd
+
+
+### Local ###
+UNIT_TEST_DIR=$(shell pwd)/unit-tests/
+CHILL_DEV_TESTCASE_DIR=$(shell pwd)/test-cases/chill
+CHILL_DEV_TESTCASES_SCRIPT=$(shell find $(CHILL_DEV_TESTCASE_DIR) -name "*.script")
+CHILL_DEV_TESTCASES_STDOUT=$(patsubst %.script,%.stdout,$(CHILL_DEV_TESTCASES_SCRIPT))
+
+### Python environment variables ###
+PYTHON_2:=$(shell which python)
+PYTHON_3:=$(shell which python3)
+
+ifneq ($(PYTHON_3),)
+PYTHON_3_VERSION=$(shell $(PYTHON_3) -c "import sysconfig; print(sysconfig.get_config_var('VERSION'))")
+endif
+PYTHON_2_VERSION=$(shell $(PYTHON_2) -c "import sysconfig; print sysconfig.get_config_var('VERSION')")
+PYTHON_VERSION=$(firstword $(PYTHON_3_VERSION) $(PYTHON_2_VERSION))
+PYTHON=$(shell which python$(PYTHON_VERSION))
+### ---------------------------- ###
+
+
+EXPORT=export CHILL_DEV_SRC=$(CHILL_DEV_SRC); \
+       export CHILL_RELEASE_SRC=$(CHILL_RELEASE_SRC); \
+       export OMEGA_DEV_SRC=$(OMEGA_DEV_SRC); \
+       export OMEGA_RELEASE_SRC=$(OMEGA_RELEASE_SRC); \
+       export STAGING_DIR_BIN=$(STAGING_DIR_BIN); \
+       export STAGING_DIR_WD=$(STAGING_DIR_WD);
+
+### deump environment ###
+# define quiet to shut this part up #
+ifndef quiet
+$(info notify on failure?          $(NOTIFY_ON_FAILURE))
+$(info staging directory           $(STAGING_DIR))
+$(info binary directory            $(STAGING_DIR_BIN))
+$(info working directory           $(STAGING_DIR_WD))
+$(info omega home                  $(OMEGAHOME))
+$(info chill home                  $(CHILLHOME))
+$(info chill svn dev repo          $(SVN_CHILL_DEV))
+$(info chill svn release repo      $(SVN_CHILL_RELEASE))
+$(info chill dev src               $(CHILL_DEV_SRC))
+$(info chill release src           $(CHILL_RELEASE_SRC))
+$(info omega svn dev repo          $(SVN_OMEGA_DEV))
+$(info omega svn release repo      $(SVN_OMEGA_RELEASE))
+$(info omega dev src               $(OMEGA_DEV_SRC))
+$(info omega release src           $(OMEGA_RELEASE_SRC))
+$(info python                      $(PYTHON))
+$(info unit tests                  $(UNIT_TEST_DIR))
+#$(info chill-dev test cases        $(CHILL_DEV_TESTCASES_SCRIPT))
+#$(info chill-dev test case stdouts $(CHILL_DEV_TESTCASES_STDOUT))
+endif
+### ----------------- ###
+
+DIRTY_EXTS=pyc o log pickle
+DIRTY_FILES=$(foreach de,$(DIRTY_EXTS),$(shell find . -name "*.$(de)"))
+DIRTY_DIRS=$(shell find . -name '__pycache__' -and -type d) $(STAGING_DIR) pylang coverage_report
+
+CORE_TESTS:=_extract util gcov _cpp_validate_env cpp_validate test __main__
+OMEGA_TESTS:=omega
+CHILL_TESTS:=chill
+
+CORE_TESTS:=$(addsuffix .py,$(addprefix unit-tests/test_,$(CORE_TESTS)))
+OMEGA_TESTS:=$(addsuffix .py,$(addprefix unit-tests/test_,$(OMEGA_TESTS)))
+CHILL_TESTS:=$(addsuffix .py,$(addprefix unit-tests/test_,$(CHILL_TESTS)))
+
+### The all target ###
+.PHONY: all
+all:
+	$(MAKE) clean quiet=1
+	$(MAKE) install quiet=1
+
+
+### This will install the chill_test module ###
+.PHONY: install
+install: pylang
+	$(PYTHON) makeparser.py
+	#TODO: maybe run a setup or something
+
+
+
+### This will uninstall teh chill_test module ###
+.PHONY: uninstall
+uninstall:
+	#TODO: can python modules be uninstalled?
+
+
+
+### Simply removes all files listed in DIRTY_FILES ###
+.PHONY: clean
+clean:
+	rm -rf $(DIRTY_FILES)
+	rm -rf $(DIRTY_DIRS)
+
+
+pylang:
+	git clone https://github.com/dhuth/pylang.git pylang-tmp
+	$(PYTHON) pylang-tmp/make_grammar_parsers.py
+	cp -r pylang-tmp/pylang pylang
+	rm -rf pylang-tmp
+
+### Test the test harness ###
+.PHONY: test
+test: $(STAGING_DIR_BIN) $(OMEGA_DEV_SRC) $(OMEGA_RELEASE_SRC) $(CHILL_DEV_SRC) $(CHILL_RELEASE_SRC)
+	@echo "-----------------------------------------------------------"
+	@echo "Note: This target tests the test suite it's self, not chill"
+	@echo "To test chill, run python -m testchill ..."
+	@echo "-----------------------------------------------------------"
+	- $(EXPORT) $(PYTHON) -m unittest $(OMEGA_TESTS) $(CORE_TESTS) $(CHILL_TESTS)
+	@ rm -rf $(STAGING_DIR)
+
+
+.PHONY: test-chill
+test-chill: $(STAGING_DIR_BIN) $(OMEGA_DEV_SRC) $(OMEGA_RELEASE_SRC) $(CHILL_DEV_SRC) $(CHILL_RELEASE_SRC)
+	- $(EXPORT) $(PYTHON) -m unittest $(OMEGA_TESTS) $(CHILL_TESTS)
+	@ rm -rf $(STAGING_DIR)
+
+
+.PHONY: test-omega
+test-omega: $(STAGING_DIR_BIN) $(OMEGA_DEV_SRC) $(OMEGA_RELEASE_SRC)
+	- $(EXPORT) $(PYTHON) -m unittest $(OMEGA_TESTS)
+	@ rm -rf $(STAGING_DIR)
+
+
+.PHONY: test-core
+test-core: $(STAGING_DIR_BIN) $(OMEGA_DEV_SRC) $(OMEGA_RELEASE_SRC) $(CHILL_DEV_SRC) $(CHILL_RELEASE_SRC) make-omega
+	- $(EXPORT) $(PYTHON) -m unittest $(CORE_TESTS)
+	@ rm -rf $(STAGING_DIR)
+
+
+.PHONY:
+test-core-%: $(STAGING_DIR_BIN)
+	- $(EXPORT) $(PYTHON) -m unittest unit-tests/test_$*.py
+
+
+.PHONY: test-debug
+debug:
+	@### NOTHING ###
+
+
+### benchmarking (don't use if your're not me) ###
+$(CHILL_DEV_TESTCASES_STDOUT): %.stdout: %.script
+	$(EXPORT) cd $(STAGING_DIR_WD); $(STAGING_DIR_BIN)/chill $< > $@
+
+
+.PHONY: benchmark-dev
+benchmark-dev: test-chill $(CHILL_DEV_TESTCASES_STDOUT)
+	# do nothing
+
+
+### checking out and making directories ###
+$(STAGING_DIR_BIN):
+	mkdir -p $(STAGING_DIR_BIN)
+	mkdir -p $(STAGING_DIR_WD)
+
+$(CHILL_DEV_SRC): $(OMEGA_DEV_SRC) $(STAGING_DIR_BIN)
+	svn export $(SVN_CHILL_DEV) $(CHILL_DEV_SRC)
+
+$(CHILL_RELEASE_SRC): $(OMEGA_RELEASE_SRC) $(STAGIN_DIR_BIN)
+	svn export $(SVN_CHILL_RELEASE) $(CHILL_RELEASE_SRC)
+
+$(OMEGA_DEV_SRC): $(STAGING_DIR_BIN)
+	svn export $(SVN_OMEGA_DEV) $(OMEGA_DEV_SRC)
+	#cd $(OMEGA_DEV_SRC); $(MAKE) depend
+	#cd $(OMEGA_DEV_SRC); $(MAKE)
+
+$(OMEGA_RELEASE_SRC): $(STAGING_DIR_BIN)
+	svn export $(SVN_OMEGA_RELEASE) $(OMEGA_RELEASE_SRC)
+	#cd $(OMEGA_RELEASE_SRC); $(MAKE) depend
+	#cd $(OMEGA_RELEASE_SRC): $(MAKE)
+
+.PHONY: make-omega
+make-omega:
+	cd $(OMEGA_DEV_SRC); $(MAKE) depend
+	cd $(OMEGA_DEV_SRC); $(MAKE)
+	cd $(OMEGA_RELEASE_SRC); $(MAKE) depend
+	cd $(OMEGA_RELEASE_SRC); $(MAKE)
+
+#$(STAGING_DIR):
+#	mkdir -p $(STAGING_DIR)
+
diff --git a/test-chill/README.md b/test-chill/README.md
new file mode 100644
index 0000000..e35ff68
--- /dev/null
+++ b/test-chill/README.md
@@ -0,0 +1,149 @@
+# testchill
+
+## Description  
+TODO: better description  
+testchill is a Python module that runs a series of tests to aid in the development and maintence of CHiLL.
+testchill tests that chill compiles successfully, that scripts can be run without error, and that they generate compilable code.
+It can also optionally test optimized code for correctness and provide code coverage.  
+
+
+## Running testchill  
+
+testchill is a Python module, and can be run like any other Python module:  
+`python -m testchill <suite-args>* <sub-command> <sub-command-args>*`  
+
+The most basic subcommand that runs the testsuite is [`local`](#-local-chill-home-). `local` runs a set of tests
+on the parent chill source directory.
+
+`python -m testchill [-O <path-to-omega>] local [-C <path-to-chill>]` If the environment variable $OMEGAHOME is set, the `-O` argument can be ommited.
+
+### Arguments common to all sub commands (with the exception of `local`):  
+- `-w <working-directory>, --working-dir <working-directory>`
+
+   Sets the working directory where testchill will compile and run test scripts. If not set, the current working  directory will be used.
+
+- `-R <rose-home>, --rose-home <rose-home>`
+
+   Set ROSEHOME environment variable for building omega. If not set, the current ROSEHOME environment variable will be used.
+
+- `-C <chill directory>, --chill-home <chill-home>`
+
+   Set the path to chill. If not set, the current CHILLHOME environment variable will be used.
+
+- `-O <omega directory>, --omega-home <omega-home>`
+
+   Set the path to omega. If not set, the current OMEGAHOME environment variable will be used.
+
+- `-b <binary directory>, --binary-dir <binary directory>`
+
+   Set the directory were all chill binary files will be placed after being compiled. The chill directory will be used by default.
+
+### Subcommands for running individual tests:  
+- <h4> `build-chill-testcase ...`
+
+   Build chill. It will fail if the build process returns non zero.  
+   Optional arguments:  
+   - `-v {release | dev}` or `--chill-branch {release | dev}`
+   
+     `release` will build the old release version, and `dev` will build the current development version.  
+     `dev` is used by default.
+   
+   - `-u | -c` or `--target-cuda | --target-c`
+   
+     `-c` will build chill, and `-u` will build cuda-chill.  
+     `-c` is used by default.
+   
+   - `-i {script | lua | python}` or `--interface-lang {script | lua | python}`
+   
+     Set the interface language chill will be build for.  
+     `script` will build chill with the original chill script language.  
+     `lua` will build chill with lua as the interface language.  
+     `python` will build chill with python as the interface language.  
+     By default, `script` is used for chill and `lua` is used for cuda-chill.  
+   
+   - `--build-coverage | --no-build-coverage`
+   
+     `--build-coverage` will build chill to work with gcov.  
+     `--no-build-coverage` will build chill normally.  
+     It is on by default.  
+   
+- <h4> `chill-testcase <chill-script> <chill-src> ...`
+
+   Run a chill test script.  
+   Arguments:  
+   - `chill-script`
+     
+     Path to the script file.  
+     
+   - `chill-src`
+     
+     Path to the source file.  
+     
+   Optional arguments:
+   - `-v {release | dev}` or `--chill-branch {release | dev}`
+   
+     `release` will run scripts as the old release version, and `dev` will run them  as the current development version.  
+     `dev` is used by default.
+   
+   - `-u | -c` or `--target-cuda | --target-c`
+   
+     `-c` will run chill, and `-u` will run cuda-chill.  
+     `-c` is used by default.
+   
+   - `-i {script | lua | python}` or `--interface-lang {script | lua | python}`
+   
+     Set the interface language chill will be run with.  
+     `script` will run chill with the original chill script language.  
+     `lua` will run chill with lua as the interface language.  
+     `python` will run chill with python as the interface language.  
+     By default, `script` is used for chill and `lua` is used for cuda-chill.  
+     
+   - `--compile-src | --no-compile-src`
+     
+     Turns source compilation test on or off. If on, the source file will be compiled prior to transormation.  
+     On by default.  
+     
+   - `--run-script | --no-run-script`
+     
+     If on, the script file will be run.  
+     On by default.  
+     
+   - `--compile-gensrc | --no-compile-gensrc`
+     
+     If on, the generated source file will be compiled.  
+     On by default.  
+     
+   - `--check-run-script | --no-check-run-script`
+     
+     If on, the generated object file will be run. If there are any validation tests, each one will be compiled and run.  
+     On by default.  
+     
+   - `--test-coverage | --no-test-coverage`
+     
+     If on, coverage data will be compiled during the run-script test.  
+     On by default.  
+   
+- <h4> `batch <batch-file>`
+   
+   Run a test case list (*.tclist) file. Each line constists of a subcommand to be passed to testchill (including additional `batch` commands).  
+   Arguments:
+   - `<batch-file>`
+     
+     Path to a test case list file.
+   
+- <h4> `local <chill-home> ...`
+  
+  Compile and test a local chill source directory.  
+  Arguments:
+  - `<chill-home>`
+    
+    Path to chill.  
+  
+  Optional arguments:  
+  - `-v {release | dev}` or `--chill-branch {release | dev}`
+   
+     `release` will run scripts as the old release version, and `dev` will run them  as the current development version.  
+     `dev` is used by default.
+  
+
+
diff --git a/test-chill/coverage.py b/test-chill/coverage.py
new file mode 100644
index 0000000..8ff5af8
--- /dev/null
+++ b/test-chill/coverage.py
@@ -0,0 +1,64 @@
+import argparse
+import pickle
+
+
+def loadcov(filename = 'coverage.pickle'):
+    with open(filename) as f:
+        return pickle.load(f)
+
+
+def lines(covset, filename):
+    for line in covset.coverage_by_file[filename].lines:
+        yield line.lineno, line.count(), line.code
+
+
+def nonexecuted(covset, filename):
+    return filter(lambda line: line[1] == 0, lines(covset, filename))
+
+
+def commented(covset, filename):
+    return filter(lambda line: line[1] is None, lines(covset, filename))
+
+
+def linerange(lineiter, minline, maxline):
+    return filter(lambda line: line[0] >= minline and line[0] <= maxline, lineiter)
+
+
+def print_nonexec(argsns, cov):
+    if argsns.filename is None:
+        covlist = list((k, len(list(nonexecuted(cov, k)))) for k in cov.filenames)
+        covlist = sorted(covlist, key=lambda i: i[1])
+        for i in reversed(range(len(covlist))):
+            print('{}: {}'.format(covlist[i][0].ljust(24), covlist[i][1]))
+    else:
+        minline, maxline = map(int,argsns.linerange)
+        for lineno, count, code in linerange(nonexecuted(cov, argsns.filename), minline, maxline):
+            print('{}: {}'.format(str(lineno).rjust(5), code))
+
+
+def print_full_nonexec(argsns, cov):
+    for filename in cov.filenames:
+        with open('coverage_report/' + filename + '.txt', 'w') as f:
+            minline, maxline = map(int,argsns.linerange)
+            for lineno, count, code in linerange(nonexecuted(cov, filename), minline, maxline):
+                f.write('{}: {}\n'.format(str(lineno).rjust(5),code))
+
+
+def make_argparser():
+    arg_parser = argparse.ArgumentParser('coverage.py')
+    cmd_parser_set = arg_parser.add_subparsers()
+    nonexec_cmd = cmd_parser_set.add_parser('nonexec')
+    nonexec_cmd.add_argument('-f', dest='filename', default=None)
+    nonexec_cmd.add_argument('-r', dest='linerange', nargs=2, default=(0, 120000), metavar='STARTLINE ENDLINE')
+    nonexec_cmd.set_defaults(func=print_nonexec)
+    full_nonexec_cmd = cmd_parser_set.add_parser('full_nonexec')
+    full_nonexec_cmd.add_argument('-r', dest='linerange', nargs=2, default=(0, 120000), metavar='STARTLINE ENDLINE')
+    full_nonexec_cmd.set_defaults(func=print_full_nonexec)
+    return arg_parser
+    
+
+if __name__ == '__main__':
+    argsns = make_argparser().parse_args()
+    cov = loadcov()
+    argsns.func(argsns, cov)
+
diff --git a/test-chill/makeparser.py b/test-chill/makeparser.py
new file mode 100644
index 0000000..5e41489
--- /dev/null
+++ b/test-chill/makeparser.py
@@ -0,0 +1,9 @@
+import pylang.parser
+import pickle
+
+if __name__ == '__main__':
+    gstream = open('testchill/cpp_validate/grammar.txt', 'r')
+    env = dict()
+    exec('from testchill._cpp_validate_env import *', None, env)
+    parser = pylang.parser.generate(gstream, env)
+    pickle.dump(parser, open('testchill/cpp_validate/parser.pickle', 'wb'), 2)
diff --git a/test-chill/test-cases/chill-lua.tclist b/test-chill/test-cases/chill-lua.tclist
new file mode 100644
index 0000000..39bd140
--- /dev/null
+++ b/test-chill/test-cases/chill-lua.tclist
@@ -0,0 +1,19 @@
+
+#chill-testcase  test-cases/chill/test_distribute.lua    test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_fuse.lua          test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_known.lua         test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_original.lua      test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_peel.lua          test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_permute.lua       test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_print_code.lua    test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_print_dep.lua     test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_print_space.lua   test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_reverse.lua       test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_scale.lua         test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_shift.lua         test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_shift_to.lua      test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_skew.lua          test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_tile.lua          test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_unroll_extra.lua  test-cases/chill/mm.c  --check-run-script
+#chill-testcase  test-cases/chill/test_unroll.lua        test-cases/chill/mm.c  --check-run-script
+
diff --git a/test-chill/test-cases/chill-python.tclist b/test-chill/test-cases/chill-python.tclist
new file mode 100644
index 0000000..ee6c54c
--- /dev/null
+++ b/test-chill/test-cases/chill-python.tclist
@@ -0,0 +1,19 @@
+
+chill-testcase  test-cases/chill/test_distribute.py    test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_fuse.py          test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_known.py         test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_original.py      test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_peel.py          test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_permute.py       test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_print_code.py    test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_print_dep.py     test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_print_space.py   test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_reverse.py       test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_scale.py         test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_shift.py         test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_shift_to.py      test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_skew.py          test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_tile.py          test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_unroll_extra.py  test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_unroll.py        test-cases/chill/mm.c  --check-run-script
+
diff --git a/test-chill/test-cases/chill-script.tclist b/test-chill/test-cases/chill-script.tclist
new file mode 100644
index 0000000..117ee0d
--- /dev/null
+++ b/test-chill/test-cases/chill-script.tclist
@@ -0,0 +1,36 @@
+
+chill-testcase  test-cases/chill/test_distribute.script    test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_fuse.script          test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_known.script         test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_original.script      test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_peel.script          test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_permute.script       test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_print_code.script    test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_print_dep.script     test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_print_space.script   test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_reverse.script       test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_scale.script         test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_shift.script         test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_shift_to.script      test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_skew.script          test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_tile.script          test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_unroll_extra.script  test-cases/chill/mm.c  --check-run-script
+chill-testcase  test-cases/chill/test_unroll.script        test-cases/chill/mm.c  --check-run-script
+
+chill-testcase  test-cases/examples/chill/gemm.script             test-cases/examples/chill/gemm.c
+chill-testcase  test-cases/examples/chill/gemv.script             test-cases/examples/chill/gemv.c
+chill-testcase  test-cases/examples/chill/jacobi1.script          test-cases/examples/chill/jacobi1.c
+chill-testcase  test-cases/examples/chill/jacobi2.script          test-cases/examples/chill/jacobi2.c
+chill-testcase  test-cases/examples/chill/qr.script               test-cases/examples/chill/qr.c
+#chill-testcase  test-cases/examples/chill/scalar_test.script      test-cases/examples/chill/scalar_test.c      --check-run-script --fail-run-script
+chill-testcase  test-cases/examples/chill/swim.script             test-cases/examples/chill/swim.c
+chill-testcase  test-cases/examples/chill/test_align.script       test-cases/examples/chill/test_align.c
+#chill-testcase  test-cases/examples/chill/test_fusion.script      test-cases/examples/chill/test_fusion.c      --check-run-script --fail-run-script
+#chill-testcase  test-cases/examples/chill/test_lex_order.script   test-cases/examples/chill/test_lex_order.c   --check-run-script --fail-run-script
+chill-testcase  test-cases/examples/chill/test_split.script       test-cases/examples/chill/test_split.c
+#chill-testcase  test-cases/examples/chill/test_split2.script      test-cases/examples/chill/test_split2.c      --check-run-script --fail-run-script
+chill-testcase  test-cases/examples/chill/test_tile.script        test-cases/examples/chill/test_tile.c
+#chill-testcase  test-cases/examples/chill/tile_violation.script   test-cases/examples/chill/tile_violation.c   --check-run-script --fail-run-script
+chill-testcase  test-cases/examples/chill/unroll.script           test-cases/examples/chill/unroll.c
+#chill-testcase  test-cases/examples/chill/unroll_violation.script test-cases/examples/chill/unroll_violation.c --check-run-script --fail-run-script
+
diff --git a/test-chill/mm.c b/test-chill/test-cases/chill/mm.c
index 354d929..354d929 100644
--- a/test-chill/mm.c
+++ b/test-chill/test-cases/chill/mm.c
diff --git a/test-chill/test_distribute.py b/test-chill/test-cases/chill/test_distribute.py
index 760d29f..760d29f 100644
--- a/test-chill/test_distribute.py
+++ b/test-chill/test-cases/chill/test_distribute.py
diff --git a/test-chill/test-cases/chill/test_distribute.script b/test-chill/test-cases/chill/test_distribute.script
new file mode 100644
index 0000000..2476e8d
--- /dev/null
+++ b/test-chill/test-cases/chill/test_distribute.script
@@ -0,0 +1,10 @@
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+known(ambn > 0)
+known(an > 0)
+known(bm > 0)
+distribute([0,1], 1)
+print
diff --git a/test-chill/test-cases/chill/test_distribute.stdout b/test-chill/test-cases/chill/test_distribute.stdout
new file mode 100644
index 0000000..f6aa1a8
--- /dev/null
+++ b/test-chill/test-cases/chill/test_distribute.stdout
@@ -0,0 +1,14 @@
+for(t2 = 0; t2 <= an-1; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    s0(t2,t4,0);
+  }
+}
+for(t2 = 0; t2 <= an-1; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    for(t6 = 0; t6 <= ambn-1; t6++) {
+      s1(t2,t4,t6);
+    }
+  }
+}
+
+
diff --git a/test-chill/test_fuse.py b/test-chill/test-cases/chill/test_fuse.py
index 32c594c..32c594c 100644
--- a/test-chill/test_fuse.py
+++ b/test-chill/test-cases/chill/test_fuse.py
diff --git a/test-chill/test-cases/chill/test_fuse.script b/test-chill/test-cases/chill/test_fuse.script
new file mode 100644
index 0000000..6578ad2
--- /dev/null
+++ b/test-chill/test-cases/chill/test_fuse.script
@@ -0,0 +1,12 @@
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+known(ambn > 0)
+known(an > 0)
+known(bm > 0)
+distribute([0,1], 1)
+print
+fuse([0,1], 1)
+print
diff --git a/test-chill/test-cases/chill/test_fuse.stdout b/test-chill/test-cases/chill/test_fuse.stdout
new file mode 100644
index 0000000..922d511
--- /dev/null
+++ b/test-chill/test-cases/chill/test_fuse.stdout
@@ -0,0 +1,25 @@
+for(t2 = 0; t2 <= an-1; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    s0(t2,t4,0);
+  }
+}
+for(t2 = 0; t2 <= an-1; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    for(t6 = 0; t6 <= ambn-1; t6++) {
+      s1(t2,t4,t6);
+    }
+  }
+}
+
+
+for(t2 = 0; t2 <= an-1; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    s0(t2,t4,0);
+    s1(t2,t4,0);
+    for(t6 = 1; t6 <= ambn-1; t6++) {
+      s1(t2,t4,t6);
+    }
+  }
+}
+
+
diff --git a/test-chill/test_known.py b/test-chill/test-cases/chill/test_known.py
index 662d7d0..662d7d0 100644
--- a/test-chill/test_known.py
+++ b/test-chill/test-cases/chill/test_known.py
diff --git a/test-chill/test-cases/chill/test_known.script b/test-chill/test-cases/chill/test_known.script
new file mode 100644
index 0000000..6772e18
--- /dev/null
+++ b/test-chill/test-cases/chill/test_known.script
@@ -0,0 +1,9 @@
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+known(ambn > 0)
+known(an > 0)
+known(bm > 0)
+print
diff --git a/test-chill/test-cases/chill/test_known.stdout b/test-chill/test-cases/chill/test_known.stdout
new file mode 100644
index 0000000..6975a99
--- /dev/null
+++ b/test-chill/test-cases/chill/test_known.stdout
@@ -0,0 +1,11 @@
+for(t2 = 0; t2 <= an-1; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    s0(t2,t4,0);
+    s1(t2,t4,0);
+    for(t6 = 1; t6 <= ambn-1; t6++) {
+      s1(t2,t4,t6);
+    }
+  }
+}
+
+
diff --git a/test-chill/test_known_2.py b/test-chill/test-cases/chill/test_known_2.py
index 5b16325..5b16325 100644
--- a/test-chill/test_known_2.py
+++ b/test-chill/test-cases/chill/test_known_2.py
diff --git a/test-chill/test_original.py b/test-chill/test-cases/chill/test_original.py
index 2d17799..2d17799 100644
--- a/test-chill/test_original.py
+++ b/test-chill/test-cases/chill/test_original.py
diff --git a/test-chill/test-cases/chill/test_original.script b/test-chill/test-cases/chill/test_original.script
new file mode 100644
index 0000000..8f07121
--- /dev/null
+++ b/test-chill/test-cases/chill/test_original.script
@@ -0,0 +1,12 @@
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+known(ambn > 4)
+known(an > 0)
+known(bm > 0)
+peel(1,3,4)
+print
+original()
+print
diff --git a/test-chill/test-cases/chill/test_original.stdout b/test-chill/test-cases/chill/test_original.stdout
new file mode 100644
index 0000000..5121763
--- /dev/null
+++ b/test-chill/test-cases/chill/test_original.stdout
@@ -0,0 +1,28 @@
+for(t2 = 0; t2 <= an-1; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    s2(t2,t4,0);
+    s3(t2,t4,0);
+    s4(t2,t4,1);
+    s5(t2,t4,2);
+    s6(t2,t4,3);
+    for(t6 = 4; t6 <= ambn-1; t6++) {
+      s1(t2,t4,t6);
+    }
+  }
+}
+
+
+for(t2 = 0; t2 <= an-1; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    s2(t2,t4,0);
+    s3(t2,t4,0);
+    s4(t2,t4,1);
+    s5(t2,t4,2);
+    s6(t2,t4,3);
+    for(t6 = 4; t6 <= ambn-1; t6++) {
+      s1(t2,t4,t6);
+    }
+  }
+}
+
+
diff --git a/test-chill/test_peel.py b/test-chill/test-cases/chill/test_peel.py
index bb6c583..bb6c583 100644
--- a/test-chill/test_peel.py
+++ b/test-chill/test-cases/chill/test_peel.py
diff --git a/test-chill/test-cases/chill/test_peel.script b/test-chill/test-cases/chill/test_peel.script
new file mode 100644
index 0000000..121868e
--- /dev/null
+++ b/test-chill/test-cases/chill/test_peel.script
@@ -0,0 +1,10 @@
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+known(ambn > 4)
+known(an > 0)
+known(bm > 0)
+peel(1,3,4)
+print
diff --git a/test-chill/test-cases/chill/test_peel.stdout b/test-chill/test-cases/chill/test_peel.stdout
new file mode 100644
index 0000000..7096b21
--- /dev/null
+++ b/test-chill/test-cases/chill/test_peel.stdout
@@ -0,0 +1,14 @@
+for(t2 = 0; t2 <= an-1; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    s2(t2,t4,0);
+    s3(t2,t4,0);
+    s4(t2,t4,1);
+    s5(t2,t4,2);
+    s6(t2,t4,3);
+    for(t6 = 4; t6 <= ambn-1; t6++) {
+      s1(t2,t4,t6);
+    }
+  }
+}
+
+
diff --git a/test-chill/test-cases/chill/test_permute.py b/test-chill/test-cases/chill/test_permute.py
new file mode 100644
index 0000000..c201d2f
--- /dev/null
+++ b/test-chill/test-cases/chill/test_permute.py
@@ -0,0 +1,12 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known('ambn > 0')
+known('an > 0')
+known('bm > 0')
+permute([3,1,2])
+print_code()
diff --git a/test-chill/test-cases/chill/test_permute.script b/test-chill/test-cases/chill/test_permute.script
new file mode 100644
index 0000000..946bff5
--- /dev/null
+++ b/test-chill/test-cases/chill/test_permute.script
@@ -0,0 +1,10 @@
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+known(ambn > 0)
+known(an > 0)
+known(bm > 0)
+permute([3,1,2])
+print
diff --git a/test-chill/test-cases/chill/test_permute.stdout b/test-chill/test-cases/chill/test_permute.stdout
new file mode 100644
index 0000000..0268d6b
--- /dev/null
+++ b/test-chill/test-cases/chill/test_permute.stdout
@@ -0,0 +1,17 @@
+for(t2 = 0; t2 <= ambn-1; t2++) {
+  for(t4 = 0; t4 <= an-1; t4++) {
+    if (t2 <= 0) {
+      for(t6 = 0; t6 <= bm-1; t6++) {
+        s0(t4,t6,t2);
+        s1(t4,t6,t2);
+      }
+    }
+    else {
+      for(t6 = 0; t6 <= bm-1; t6++) {
+        s1(t4,t6,t2);
+      }
+    }
+  }
+}
+
+
diff --git a/test-chill/test_print_code.py b/test-chill/test-cases/chill/test_print_code.py
index 004c46c..004c46c 100644
--- a/test-chill/test_print_code.py
+++ b/test-chill/test-cases/chill/test_print_code.py
diff --git a/test-chill/test-cases/chill/test_print_code.script b/test-chill/test-cases/chill/test_print_code.script
new file mode 100644
index 0000000..20c8364
--- /dev/null
+++ b/test-chill/test-cases/chill/test_print_code.script
@@ -0,0 +1,7 @@
+
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+print
diff --git a/test-chill/test-cases/chill/test_print_code.stdout b/test-chill/test-cases/chill/test_print_code.stdout
new file mode 100644
index 0000000..b4ece20
--- /dev/null
+++ b/test-chill/test-cases/chill/test_print_code.stdout
@@ -0,0 +1,18 @@
+if (bm >= 1) {
+  for(t2 = 0; t2 <= an-1; t2++) {
+    for(t4 = 0; t4 <= bm-1; t4++) {
+      if (ambn >= 1) {
+        s0(t2,t4,0);
+        s1(t2,t4,0);
+      }
+      for(t6 = 1; t6 <= ambn-1; t6++) {
+        s1(t2,t4,t6);
+      }
+      if (ambn <= 0) {
+        s0(t2,t4,0);
+      }
+    }
+  }
+}
+
+
diff --git a/test-chill/test_print_dep.py b/test-chill/test-cases/chill/test_print_dep.py
index a3dee29..a3dee29 100644
--- a/test-chill/test_print_dep.py
+++ b/test-chill/test-cases/chill/test_print_dep.py
diff --git a/test-chill/test-cases/chill/test_print_dep.script b/test-chill/test-cases/chill/test_print_dep.script
new file mode 100644
index 0000000..99dc567
--- /dev/null
+++ b/test-chill/test-cases/chill/test_print_dep.script
@@ -0,0 +1,7 @@
+
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+print dep
diff --git a/test-chill/test-cases/chill/test_print_dep.stdout b/test-chill/test-cases/chill/test_print_dep.stdout
new file mode 100644
index 0000000..ab679a3
--- /dev/null
+++ b/test-chill/test-cases/chill/test_print_dep.stdout
@@ -0,0 +1,4 @@
+dependence graph:
+1->2: C:flow(0, 0, +) C:flow(0, 0, 0) C:output(0, 0, +) C:output(0, 0, 0)
+2->2: C:anti(0, 0, +) C:output(0, 0, +)
+
diff --git a/test-chill/test_print_space.py b/test-chill/test-cases/chill/test_print_space.py
index 2f8f678..2f8f678 100644
--- a/test-chill/test_print_space.py
+++ b/test-chill/test-cases/chill/test_print_space.py
diff --git a/test-chill/test-cases/chill/test_print_space.script b/test-chill/test-cases/chill/test_print_space.script
new file mode 100644
index 0000000..d8c81df
--- /dev/null
+++ b/test-chill/test-cases/chill/test_print_space.script
@@ -0,0 +1,7 @@
+
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+print space
diff --git a/test-chill/test-cases/chill/test_print_space.stdout b/test-chill/test-cases/chill/test_print_space.stdout
new file mode 100644
index 0000000..f97c8c1
--- /dev/null
+++ b/test-chill/test-cases/chill/test_print_space.stdout
@@ -0,0 +1,3 @@
+s0: { Sym=[bm,an] [t1,t2,t3,t4,t5,t6,t7] : t1 = 0 && t3 = 0 && t5 = 0 && t7 = 0 && t6 = 0 && 0 <= t2 < an && 0 <= t4 < bm }
+s1: { Sym=[ambn,bm,an] [t1,t2,t3,t4,t5,t6,t7] : t1 = 0 && t3 = 0 && t5 = 0 && t7 = 0 && 0 <= t2 < an && 0 <= t6 < ambn && 0 <= t4 < bm }
+
diff --git a/test-chill/test_reverse.py b/test-chill/test-cases/chill/test_reverse.py
index a97c611..a97c611 100644
--- a/test-chill/test_reverse.py
+++ b/test-chill/test-cases/chill/test_reverse.py
diff --git a/test-chill/test-cases/chill/test_reverse.script b/test-chill/test-cases/chill/test_reverse.script
new file mode 100644
index 0000000..fc04d5c
--- /dev/null
+++ b/test-chill/test-cases/chill/test_reverse.script
@@ -0,0 +1,12 @@
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+known(ambn > 0)
+known(an > 0)
+known(bm > 0)
+distribute([0,1],1)
+reverse([1],1)
+reverse([1],2)
+print
diff --git a/test-chill/test-cases/chill/test_reverse.stdout b/test-chill/test-cases/chill/test_reverse.stdout
new file mode 100644
index 0000000..182b822
--- /dev/null
+++ b/test-chill/test-cases/chill/test_reverse.stdout
@@ -0,0 +1,14 @@
+for(t2 = 0; t2 <= an-1; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    s0(t2,t4,0);
+  }
+}
+for(t2 = -an+1; t2 <= 0; t2++) {
+  for(t4 = -bm+1; t4 <= 0; t4++) {
+    for(t6 = 0; t6 <= ambn-1; t6++) {
+      s1(-t2,-t4,t6);
+    }
+  }
+}
+
+
diff --git a/test-chill/test_scale.py b/test-chill/test-cases/chill/test_scale.py
index ee8455d..ee8455d 100644
--- a/test-chill/test_scale.py
+++ b/test-chill/test-cases/chill/test_scale.py
diff --git a/test-chill/test-cases/chill/test_scale.script b/test-chill/test-cases/chill/test_scale.script
new file mode 100644
index 0000000..20611ec
--- /dev/null
+++ b/test-chill/test-cases/chill/test_scale.script
@@ -0,0 +1,13 @@
+
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+known(ambn > 0)
+known(an > 0)
+known(bm > 0)
+distribute([0,1],1)
+scale([1],1,4)
+scale([1],2,4)
+print
diff --git a/test-chill/test-cases/chill/test_scale.stdout b/test-chill/test-cases/chill/test_scale.stdout
new file mode 100644
index 0000000..049451c
--- /dev/null
+++ b/test-chill/test-cases/chill/test_scale.stdout
@@ -0,0 +1,14 @@
+for(t2 = 0; t2 <= an-1; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    s0(t2,t4,0);
+  }
+}
+for(t2 = 0; t2 <= 4*an-4; t2 += 4) {
+  for(t4 = 0; t4 <= 4*bm-4; t4 += 4) {
+    for(t6 = 0; t6 <= ambn-1; t6++) {
+      s1(t2/4,t4/4,t6);
+    }
+  }
+}
+
+
diff --git a/test-chill/test_shift.py b/test-chill/test-cases/chill/test_shift.py
index b3fc6ab..b3fc6ab 100644
--- a/test-chill/test_shift.py
+++ b/test-chill/test-cases/chill/test_shift.py
diff --git a/test-chill/test-cases/chill/test_shift.script b/test-chill/test-cases/chill/test_shift.script
new file mode 100644
index 0000000..d3d67aa
--- /dev/null
+++ b/test-chill/test-cases/chill/test_shift.script
@@ -0,0 +1,11 @@
+
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+known(ambn > 0)
+known(an > 0)
+known(bm > 0)
+shift([1],1,4)
+print
diff --git a/test-chill/test-cases/chill/test_shift.stdout b/test-chill/test-cases/chill/test_shift.stdout
new file mode 100644
index 0000000..2b96895
--- /dev/null
+++ b/test-chill/test-cases/chill/test_shift.stdout
@@ -0,0 +1,24 @@
+for(t2 = 0; t2 <= an+3; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    if (an >= t2+1) {
+      s0(t2,t4,0);
+      if (t2 >= 4) {
+        s1(t2-4,t4,0);
+      }
+      if (t2 >= 4) {
+        for(t6 = 1; t6 <= ambn-1; t6++) {
+          s1(t2-4,t4,t6);
+        }
+      }
+    }
+    else {
+      if (t2 >= 4) {
+        for(t6 = 0; t6 <= ambn-1; t6++) {
+          s1(t2-4,t4,t6);
+        }
+      }
+    }
+  }
+}
+
+
diff --git a/test-chill/test_shift_to.py b/test-chill/test-cases/chill/test_shift_to.py
index f3537c5..f3537c5 100644
--- a/test-chill/test_shift_to.py
+++ b/test-chill/test-cases/chill/test_shift_to.py
diff --git a/test-chill/test-cases/chill/test_shift_to.script b/test-chill/test-cases/chill/test_shift_to.script
new file mode 100644
index 0000000..64a6443
--- /dev/null
+++ b/test-chill/test-cases/chill/test_shift_to.script
@@ -0,0 +1,11 @@
+
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+known(ambn > 0)
+known(an > 0)
+known(bm > 0)
+shift_to(1,1,4)
+print
diff --git a/test-chill/test-cases/chill/test_shift_to.stdout b/test-chill/test-cases/chill/test_shift_to.stdout
new file mode 100644
index 0000000..820d83f
--- /dev/null
+++ b/test-chill/test-cases/chill/test_shift_to.stdout
@@ -0,0 +1,11 @@
+for(t2 = 4; t2 <= an+3; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    s0(t2-4,t4,0);
+    s1(t2-4,t4,0);
+    for(t6 = 1; t6 <= ambn-1; t6++) {
+      s1(t2-4,t4,t6);
+    }
+  }
+}
+
+
diff --git a/test-chill/test_skew.py b/test-chill/test-cases/chill/test_skew.py
index c7271f4..c7271f4 100644
--- a/test-chill/test_skew.py
+++ b/test-chill/test-cases/chill/test_skew.py
diff --git a/test-chill/test-cases/chill/test_skew.script b/test-chill/test-cases/chill/test_skew.script
new file mode 100644
index 0000000..e9cf75a
--- /dev/null
+++ b/test-chill/test-cases/chill/test_skew.script
@@ -0,0 +1,11 @@
+
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+known(ambn > 0)
+known(an > 0)
+known(bm > 0)
+skew([1],1,[4])
+print
diff --git a/test-chill/test-cases/chill/test_skew.stdout b/test-chill/test-cases/chill/test_skew.stdout
new file mode 100644
index 0000000..a1b36f8
--- /dev/null
+++ b/test-chill/test-cases/chill/test_skew.stdout
@@ -0,0 +1,22 @@
+for(t2 = 0; t2 <= 4*an-4; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    if (an >= t2+1) {
+      s0(t2,t4,0);
+      if (intMod(t2,4) == 0) {
+        s1(t2/4,t4,0);
+      }
+    }
+    else {
+      if (intMod(t2,4) == 0) {
+        s1(t2/4,t4,0);
+      }
+    }
+    if (intMod(t2,4) == 0) {
+      for(t6 = 1; t6 <= ambn-1; t6++) {
+        s1(t2/4,t4,t6);
+      }
+    }
+  }
+}
+
+
diff --git a/test-chill/test_tile.py b/test-chill/test-cases/chill/test_tile.py
index fbe0368..fbe0368 100644
--- a/test-chill/test_tile.py
+++ b/test-chill/test-cases/chill/test_tile.py
diff --git a/test-chill/test-cases/chill/test_tile.script b/test-chill/test-cases/chill/test_tile.script
new file mode 100644
index 0000000..de27998
--- /dev/null
+++ b/test-chill/test-cases/chill/test_tile.script
@@ -0,0 +1,7 @@
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+tile(0, 2, 4)
+print
diff --git a/test-chill/test-cases/chill/test_tile.stdout b/test-chill/test-cases/chill/test_tile.stdout
new file mode 100644
index 0000000..0a2d89a
--- /dev/null
+++ b/test-chill/test-cases/chill/test_tile.stdout
@@ -0,0 +1,20 @@
+if (an >= 1) {
+  for(t2 = 0; t2 <= bm-1; t2 += 4) {
+    for(t4 = 0; t4 <= an-1; t4++) {
+      for(t6 = t2; t6 <= min(bm-1,t2+3); t6++) {
+        if (ambn >= 1) {
+          s0(t4,t6,0);
+          s1(t4,t6,0);
+        }
+        for(t8 = 1; t8 <= ambn-1; t8++) {
+          s1(t4,t6,t8);
+        }
+        if (ambn <= 0) {
+          s0(t4,t6,0);
+        }
+      }
+    }
+  }
+}
+
+
diff --git a/test-chill/test_unroll.py b/test-chill/test-cases/chill/test_unroll.py
index 39dd0db..39dd0db 100644
--- a/test-chill/test_unroll.py
+++ b/test-chill/test-cases/chill/test_unroll.py
diff --git a/test-chill/test-cases/chill/test_unroll.script b/test-chill/test-cases/chill/test_unroll.script
new file mode 100644
index 0000000..bd19bd1
--- /dev/null
+++ b/test-chill/test-cases/chill/test_unroll.script
@@ -0,0 +1,11 @@
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+known(ambn > 0)
+known(an > 0)
+known(bm > 0)
+distribute([0,1], 1)
+unroll(1, 3, 4)
+print
diff --git a/test-chill/test-cases/chill/test_unroll.stdout b/test-chill/test-cases/chill/test_unroll.stdout
new file mode 100644
index 0000000..71616bf
--- /dev/null
+++ b/test-chill/test-cases/chill/test_unroll.stdout
@@ -0,0 +1,19 @@
+for(t2 = 0; t2 <= an-1; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    s0(t2,t4,0);
+  }
+}
+for(t2 = 0; t2 <= an-1; t2++) {
+  for(t4 = 0; t4 <= bm-1; t4++) {
+    s2(t2,t4);
+    for(t6 = 0; t6 <= -over1+ambn-1; t6 += 4) {
+      s1(t2,t4,t6);
+      s4(t2,t4,t6);
+    }
+    for(t6 = max(0,ambn-over1); t6 <= ambn-1; t6++) {
+      s3(t2,t4,t6);
+    }
+  }
+}
+
+
diff --git a/test-chill/test_unroll_extra.py b/test-chill/test-cases/chill/test_unroll_extra.py
index 929313c..929313c 100644
--- a/test-chill/test_unroll_extra.py
+++ b/test-chill/test-cases/chill/test_unroll_extra.py
diff --git a/test-chill/test-cases/chill/test_unroll_extra.script b/test-chill/test-cases/chill/test_unroll_extra.script
new file mode 100644
index 0000000..fae244e
--- /dev/null
+++ b/test-chill/test-cases/chill/test_unroll_extra.script
@@ -0,0 +1,11 @@
+
+source: mm.c
+procedure: mm
+format: rose
+loop: 0
+
+known(ambn > 0)
+known(an > 0)
+known(bm > 0)
+unroll_extra(1, 2, 4)
+print
diff --git a/test-chill/test-cases/chill/test_unroll_extra.stdout b/test-chill/test-cases/chill/test_unroll_extra.stdout
new file mode 100644
index 0000000..eca65f2
--- /dev/null
+++ b/test-chill/test-cases/chill/test_unroll_extra.stdout
@@ -0,0 +1,28 @@
+for(t2 = 0; t2 <= an-1; t2++) {
+  s2(t2);
+  for(t4 = 0; t4 <= -over1+bm-1; t4 += 4) {
+    s0(t2,t4,0);
+    s1(t2,t4,0);
+    s5(t2,t4,0);
+    s6(t2,t4,0);
+    s7(t2,t4,0);
+    s8(t2,t4,0);
+    s9(t2,t4,0);
+    s10(t2,t4,0);
+    for(t6 = 1; t6 <= ambn-1; t6++) {
+      s1(t2,t4,t6);
+      s6(t2,t4,t6);
+      s8(t2,t4,t6);
+      s10(t2,t4,t6);
+    }
+  }
+  for(t4 = max(bm-over1,0); t4 <= bm-1; t4++) {
+    s3(t2,t4,0);
+    s4(t2,t4,0);
+    for(t6 = 1; t6 <= ambn-1; t6++) {
+      s4(t2,t4,t6);
+    }
+  }
+}
+
+
diff --git a/test-chill/test-cases/cuda-chill-lua.tclist b/test-chill/test-cases/cuda-chill-lua.tclist
new file mode 100644
index 0000000..d2e91dc
--- /dev/null
+++ b/test-chill/test-cases/cuda-chill-lua.tclist
@@ -0,0 +1,13 @@
+
+chill-testcase -u  test-cases/examples/cuda-chill/cp.lua         test-cases/examples/cuda-chill/cp.c         --no-compile-gensrc
+chill-testcase -u  test-cases/examples/cuda-chill/mm.lua         test-cases/examples/cuda-chill/mm.c         --no-compile-gensrc
+chill-testcase -u  test-cases/examples/cuda-chill/mpeg4.lua      test-cases/examples/cuda-chill/mpeg4.c      --no-compile-gensrc
+chill-testcase -u  test-cases/examples/cuda-chill/mriq.lua       test-cases/examples/cuda-chill/mriq.c       --no-compile-gensrc
+chill-testcase -u  test-cases/examples/cuda-chill/mriq-fh.lua    test-cases/examples/cuda-chill/mriq-fh.c    --no-compile-gensrc
+chill-testcase -u  test-cases/examples/cuda-chill/mv.lua         test-cases/examples/cuda-chill/mv.c         --no-compile-gensrc
+chill-testcase -u  test-cases/examples/cuda-chill/mv-shadow.lua  test-cases/examples/cuda-chill/mv-shadow.c  --no-compile-gensrc
+chill-testcase -u  test-cases/examples/cuda-chill/mv_try.lua     test-cases/examples/cuda-chill/mv_try.c     --no-compile-gensrc
+chill-testcase -u  test-cases/examples/cuda-chill/nbody.lua      test-cases/examples/cuda-chill/nbody.c      --no-compile-gensrc
+chill-testcase -u  test-cases/examples/cuda-chill/tmv.lua        test-cases/examples/cuda-chill/tmv.c        --no-compile-gensrc
+chill-testcase -u  test-cases/examples/cuda-chill/tmv-shadow.lua test-cases/examples/cuda-chill/tmv-shadow.c --no-compile-gensrc
+
diff --git a/test-chill/test-cases/cuda-chill-python.tclist b/test-chill/test-cases/cuda-chill-python.tclist
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/test-chill/test-cases/cuda-chill-python.tclist
@@ -0,0 +1 @@
+
diff --git a/test-chill/test-cases/examples/chill/gemm.c b/test-chill/test-cases/examples/chill/gemm.c
new file mode 100644
index 0000000..2c90ea5
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/gemm.c
@@ -0,0 +1,25 @@
+
+#ifndef N
+#define N 512
+#endif
+
+/*
+<test name=gemm define="{'N':512}">
+procedure int gemm(
+    in  float[N][N] a = matrix([,], lambda i,j: random(2,-2)),
+    in  float[N][N] b = matrix([,], lambda i,j: random(2,-2)),
+    out float[N][N] c = matrix([,], lambda i,j: 0))
+</test>
+*/
+int gemm(float a[N][N], float b[N][N], float c[N][N]) {
+	int i, j, k;
+	int n = N;
+	for (j = 0; j < n; j++)
+		for (k = 0; k < n; k++)
+			for (i = 0; i < n; i++) {
+				c[i][j] = c[i][j] + a[i][k] * b[k][j];
+			}
+
+	return 0;
+}
+
diff --git a/test-chill/test-cases/examples/chill/gemm.script b/test-chill/test-cases/examples/chill/gemm.script
new file mode 100644
index 0000000..393f236
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/gemm.script
@@ -0,0 +1,31 @@
+#matrix multiply large array size for intel machine
+source: gemm.c
+procedure: gemm
+format: rose
+loop: 0
+
+TI = 128
+TJ = 8
+TK = 512
+UI = 2
+UJ = 2
+
+permute([3,1,2])
+tile(0,2,TJ)
+#print space
+tile(0,2,TI)
+#print space
+tile(0,5,TK)
+#print space
+
+datacopy(0,3,a,false,1)
+#print space
+
+datacopy(0,4,b)
+print
+unroll(0,4,UI)#print space
+print 
+unroll(0,5,UJ)
+#print space
+print
+
diff --git a/test-chill/test-cases/examples/chill/gemv.c b/test-chill/test-cases/examples/chill/gemv.c
new file mode 100644
index 0000000..39b083c
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/gemv.c
@@ -0,0 +1,21 @@
+#ifndef N
+#define N 512
+#endif
+
+/*
+<test name=gemv define="{'N':512}">
+procedure int gemv(
+    out float[N]    a = matrix([],  lambda i:   random(2,-2)),
+    in  float[N]    b = matrix([],  lambda i:   random(2,-2)),
+    in  float[N][N] c = matrix([,], lambda i,j: random(2,-2)))
+</test>
+*/
+int gemv(float a[N], float b[N], float c[N][N]) {
+    int i, j;
+
+    for (i = 1; i < N; i++)
+        for (j = 1; j < N; j++)
+            a[i] = a[i] + c[i][j] * b[j];
+
+    return 0;
+}
diff --git a/test-chill/test-cases/examples/chill/gemv.script b/test-chill/test-cases/examples/chill/gemv.script
new file mode 100644
index 0000000..73b3b58
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/gemv.script
@@ -0,0 +1,9 @@
+source: gemv.c # matrix-vector multiply
+procedure: gemv
+format : rose
+loop: 0
+
+
+
+original()
+print
diff --git a/test-chill/test-cases/examples/chill/jacobi1.c b/test-chill/test-cases/examples/chill/jacobi1.c
new file mode 100644
index 0000000..e7ff8f8
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/jacobi1.c
@@ -0,0 +1,19 @@
+
+#ifndef N
+#define N 512
+#endif
+
+/*
+<test name=jacobi define="{'N':512}">
+procedure int jacobi(
+    in out float[N][N] a = matrix [i,j] random(2,-2))
+</test>
+*/
+int jacobi(float a[N][N]) {
+    int t, i;
+	for (t = 2; t <= 100; t++)
+		for (i = 2; i <= N - 1; i++)
+			a[t][i] = a[t - 1][i - 1] + a[t - 1][i] + a[t - 1][i + 1];
+
+	return 0;
+}
diff --git a/test-chill/test-cases/examples/chill/jacobi1.script b/test-chill/test-cases/examples/chill/jacobi1.script
new file mode 100644
index 0000000..604f763
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/jacobi1.script
@@ -0,0 +1,18 @@
+#
+# tiling perfect jacobi loop nest with time step, use
+# unimodular transformation first (only applicable to the
+# perfect loop nest) to make tiling legal.
+#
+
+source: jacobi1.c
+procedure: jacobi
+format : rose
+loop: 0
+
+print dep
+
+nonsingular([[1,0],[1,1]])  # unimodular matrix, determinant is one
+tile(0,2,64)
+
+print dep
+print
diff --git a/test-chill/test-cases/examples/chill/jacobi2.c b/test-chill/test-cases/examples/chill/jacobi2.c
new file mode 100644
index 0000000..b8d8d7b
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/jacobi2.c
@@ -0,0 +1,15 @@
+#define N 512
+
+int main() {
+	double a[N];
+	double b[N];
+	int t, i;
+	for (t = 1; t <= 100; t++) {
+		for (i = 2; i <= N - 1; i++)
+			b[i] = (double) 0.25 * (a[i - 1] + a[i + 1]) + (double) 0.5 * a[i];
+
+		for (i = 2; i <= N - 1; i++)
+			a[i] = b[i];
+	}
+	return 0;
+}
diff --git a/test-chill/test-cases/examples/chill/jacobi2.script b/test-chill/test-cases/examples/chill/jacobi2.script
new file mode 100644
index 0000000..afe14c6
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/jacobi2.script
@@ -0,0 +1,21 @@
+#
+# tiling imperfect jacobi loop nest, more details in the paper
+# "Automatic Tiling of Iterative Stencil Loops" by Zhiyuan Li and
+# Yonghong Song, TOPLAS, 2004.
+#
+
+source: jacobi2.c
+procedure: main
+format: rose
+loop: 0
+
+print dep
+
+original()
+shift([1], 2, 1)
+fuse([0,1], 2)  # optional
+skew([0,1], 2, [2,1])
+tile(0, 2, 32, 1)
+
+print dep
+print
diff --git a/test-chill/test-cases/examples/chill/qr.c b/test-chill/test-cases/examples/chill/qr.c
new file mode 100644
index 0000000..8d18b72
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/qr.c
@@ -0,0 +1,44 @@
+#include <math.h>
+
+int main() {
+
+	int M, N;
+	float** A;
+	float *s;
+	float *Rdiag;
+	float *nrm;
+	int i, j, k;
+        float t;
+	for (k = 0; k < N; k++) {
+		nrm[k] = 0;
+
+		for (i = k; i < M; i++)
+			nrm[k] = sqrt(nrm[k] * nrm[k] + A[i][k] * A[i][k]);
+                //t = A[k][k];
+
+		//if (t < 0)
+		//	nrm[k] = -nrm[k];
+		for (i = k; i < M; i++)
+			A[i][k] = A[i][k] / nrm[k];
+
+		A[k][k] = A[k][k] + 1;
+
+		for (j = k + 1; j < N; j++) {
+			s[j] = 0; //S6
+
+			for (i = k; i < M; i++)
+				s[j] = s[j] + A[i][k] * A[i][j]; //S7
+
+			s[j] = -s[j] / A[k][k]; //S8
+
+			for (i = k; i < M; i++)
+				A[i][j] = A[i][j] + s[j] * A[i][k]; //S9
+
+		}
+
+		Rdiag[k] = -nrm[k];
+
+	}
+
+	return 0;
+}
diff --git a/test-chill/test-cases/examples/chill/qr.script b/test-chill/test-cases/examples/chill/qr.script
new file mode 100644
index 0000000..6b4cd46
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/qr.script
@@ -0,0 +1,13 @@
+#
+# tiling imperfect jacobi loop nest, more details in the paper
+# "Automatic Tiling of Iterative Stencil Loops" by Zhiyuan Li and
+# Yonghong Song, TOPLAS, 2004.
+#
+
+source: qr.c
+procedure: main
+format: rose
+loop: 0
+original()
+print 
+
diff --git a/test-chill/test-cases/examples/chill/scalar_test.c b/test-chill/test-cases/examples/chill/scalar_test.c
new file mode 100644
index 0000000..733c882
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/scalar_test.c
@@ -0,0 +1,16 @@
+int a[10][10];
+int main() {
+
+	int temp;
+	int i, j;
+
+	for (i = 0; i < 10; i++) {
+		for (j = 0; j < 10; j++) {
+			a[i + 1][j - 1] = a[i][j];
+		}
+
+	}
+
+	return 0;
+
+}
diff --git a/test-chill/test-cases/examples/chill/scalar_test.script b/test-chill/test-cases/examples/chill/scalar_test.script
new file mode 100644
index 0000000..f5b0aa8
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/scalar_test.script
@@ -0,0 +1,10 @@
+#Simple Scalar dependence check
+source: scalar_test.c
+procedure: main
+format : rose
+loop: 0
+
+original()
+permute([2,1])
+print dep
+print space
diff --git a/test-chill/test-cases/examples/chill/swim.c b/test-chill/test-cases/examples/chill/swim.c
new file mode 100644
index 0000000..a21ef24
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/swim.c
@@ -0,0 +1,159 @@
+#define M 100
+#define N 100
+#define  N3 10 
+
+int main() {
+
+	int DX;
+	int DY;
+	int FSDX;
+	int FSDY;
+	int TDT;
+	int TDTS8;
+	int TDTSDX;
+	int TDTSDY;
+	int t, i, j;
+	double CU[M + 1][N + 1];
+	double CV[M + 1][N + 1];
+	double Z[M + 1][N + 1];
+	double H[M + 1][N + 1];
+	double P[M + 1][N + 1];
+	double U[M + 1][N + 1];
+	double V[M + 1][N + 1];
+	double UNEW[M + 1][N + 1];
+	double UOLD[M + 1][N + 1];
+	double PNEW[M + 1][N + 1];
+	double POLD[M + 1][N + 1];
+	double VNEW[M + 1][N + 1];
+	double VOLD[M + 1][N + 1];
+	double ALPHA;
+
+	for (t = 0; t < N3; t++) {
+
+		FSDX = 4 / DX;
+		FSDY = 4 / DY;
+
+		for (i = 0; i < M; i++) {
+			for (j = 0; j < N; j++) {
+				CU[i + 1][j] = (double) 0.5 * (P[i + 1][j] + P[i][j])
+						* U[i + 1][j];
+				CV[i][j + 1] = (double) 0.5 * (P[i][j + 1] + P[i][j])
+						* V[i][j + 1];
+				Z[i + 1][j + 1] =
+						(FSDX * (V[i + 1][j + 1] - V[i][j + 1])
+								- FSDY * (U[i + 1][j + 1] - U[i + 1][j]))
+								/ (P[i][j] + P[i + 1][j] + P[i + 1][j + 1]
+										+ P[i][j + 1]);
+				H[i][j] = P[i][j]
+						+ (double) 0.25
+								* (U[i + 1][j] * U[i + 1][j] + U[i][j] * U[i][j]
+										+ V[i][j + 1] * V[i][j + 1]
+										+ V[i][j] * V[i][j]);
+			}
+		}
+
+		for (j = 0; j < N; j++) {
+			// CU[0][j] = CU[M+1][j];
+			CU[0][j] = CU[M][j];
+			CV[M][j + 1] = CV[0][j + 1];
+			Z[0][j + 1] = Z[M][j + 1];
+			H[M][j] = H[0][j];
+		}
+
+		for (i = 0; i < M; i++) {
+			CU[i + 1][N] = CU[i + 1][0];
+			CV[i][0] = CV[i][N];
+			Z[i + 1][0] = Z[i + 1][N];
+			H[i][N] = H[i][0];
+		}
+
+		CU[0][N] = CU[M][0];
+		CV[M][0] = CV[0][N];
+		Z[0][0] = Z[M][N];
+		H[M][N] = H[0][0];
+
+		TDTS8 = TDT / 8;
+		TDTSDX = TDT / DX;
+		TDTSDY = TDT / DY;
+
+		for (i = 0; i < M; i++) {
+			for (j = 0; j < N; j++) {
+				UNEW[i + 1][j] = UOLD[i + 1][j]
+						+ TDTS8 * (Z[i + 1][j + 1] + Z[i + 1][j])
+								* (CV[i + 1][j + 1] + CV[i][j + 1] + CV[i][j]
+										+ CV[i + 1][j])
+						- TDTSDX * (H[i + 1][j] - H[i][j]);
+				VNEW[i][j + 1] = VOLD[i][j + 1]
+						- TDTS8 * (Z[i + 1][j + 1] + Z[i][j + 1])
+								* (CU[i + 1][j + 1] + CU[i][j + 1] + CU[i][j]
+										+ CU[i + 1][j])
+						- TDTSDY * (H[i][j + 1] - H[i][j]);
+				PNEW[i][j] = POLD[i][j] - TDTSDX * (CU[i + 1][j] - CU[i][j])
+						- TDTSDY * (CV[i][j + 1] - CV[i][j]);
+			}
+		}
+		for (j = 0; j < N; j++) {
+			UNEW[0][j] = UNEW[M][j];
+			VNEW[M][j + 1] = VNEW[0][j + 1];
+			PNEW[M][j] = PNEW[0][j];
+		}
+
+		for (i = 0; i < M; i++) {
+			UNEW[i + 1][N] = UNEW[i + 1][0];
+			VNEW[i][0] = VNEW[i][N];
+			PNEW[i][N] = PNEW[i][0];
+		}
+
+		UNEW[0][N] = UNEW[M][0];
+		VNEW[M][0] = VNEW[0][N];
+		PNEW[M][N] = PNEW[0][0];
+		// time = time + DT;
+
+		for (i = 0; i < M; i++) {
+			for (j = 0; j < N; j++) {
+				UOLD[i][j] = U[i][j]
+						+ ALPHA
+								* (UNEW[i][j] - (double) 2 * U[i][j]
+										+ UOLD[i][j]);
+				VOLD[i][j] = V[i][j]
+						+ ALPHA
+								* (VNEW[i][j] - (double) 2 * V[i][j]
+										+ VOLD[i][j]);
+				POLD[i][j] = P[i][j]
+						+ ALPHA
+								* (PNEW[i][j] - (double) 2 * P[i][j]
+										+ POLD[i][j]);
+				U[i][j] = UNEW[i][j];
+				V[i][j] = VNEW[i][j];
+				P[i][j] = PNEW[i][j];
+			}
+		}
+
+		for (j = 0; j < N; j++) {
+			UOLD[M][j] = UOLD[0][j];
+			VOLD[M][j] = VOLD[0][j];
+			POLD[M][j] = POLD[0][j];
+			U[M][j] = U[0][j];
+			V[M][j] = V[0][j];
+			P[M][j] = P[0][j];
+		}
+
+		for (i = 0; i < M; i++) {
+			UOLD[i][N] = UOLD[i][0];
+			VOLD[i][N] = VOLD[i][0];
+			POLD[i][N] = POLD[i][0];
+			U[i][N] = U[i][0];
+			V[i][N] = V[i][0];
+			P[i][N] = P[i][0];
+		}
+
+		UOLD[M][N] = UOLD[0][0];
+		VOLD[M][N] = VOLD[0][0];
+		POLD[M][N] = POLD[0][0];
+		U[M][N] = U[0][0];
+		V[M][N] = V[0][0];
+		P[M][N] = P[0][0];
+
+	}
+	return 0;
+}
diff --git a/test-chill/test-cases/examples/chill/swim.script b/test-chill/test-cases/examples/chill/swim.script
new file mode 100644
index 0000000..79de9d9
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/swim.script
@@ -0,0 +1,13 @@
+#
+# tiling imperfect jacobi loop nest, more details in the paper
+# "Automatic Tiling of Iterative Stencil Loops" by Zhiyuan Li and
+# Yonghong Song, TOPLAS, 2004.
+#
+
+source: swim.c
+procedure: main
+format: rose
+loop: 0
+original()
+#print space
+print
diff --git a/test-chill/test-cases/examples/chill/test_align.c b/test-chill/test-cases/examples/chill/test_align.c
new file mode 100644
index 0000000..d1365ca
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/test_align.c
@@ -0,0 +1,20 @@
+int main() {
+
+	int m, n;
+	int a[10], b[10];
+	int i, j;
+	for (i = 0; i < n; i++) {
+		for (j = 0; j < n; j++) {
+			a[i] = 1;
+		}
+
+		for (j = 0; j < n; j++) {
+			b[i] -= 1;
+		}
+
+	}
+
+	return 0;
+
+}
+
diff --git a/test-chill/test-cases/examples/chill/test_align.script b/test-chill/test-cases/examples/chill/test_align.script
new file mode 100644
index 0000000..c990e22
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/test_align.script
@@ -0,0 +1,12 @@
+#matrix multiply large array size for intel machine
+source: test_align.c
+procedure: main
+format: rose
+loop: 0
+
+original()
+
+
+
+print
+
diff --git a/test-chill/test-cases/examples/chill/test_fusion.c b/test-chill/test-cases/examples/chill/test_fusion.c
new file mode 100644
index 0000000..bd2c4f2
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/test_fusion.c
@@ -0,0 +1,13 @@
+int main() {
+
+	int a[10][10];
+	int i, j;
+	for (i = 0; i < 10; i++) {
+		for (j = 0; j < 10; j++)
+			a[i][j] = a[i][j] + 5;
+		for (j = 0; j < 10; j++)
+			a[i][j + 1] = a[i][j + 1] + 5;
+
+	}
+
+}
diff --git a/test-chill/test-cases/examples/chill/test_fusion.script b/test-chill/test-cases/examples/chill/test_fusion.script
new file mode 100644
index 0000000..41f6cc0
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/test_fusion.script
@@ -0,0 +1,7 @@
+source: test_fusion.c
+procedure: main
+loop: 0
+original()
+fuse([0,1],2)
+print
+
diff --git a/test-chill/test-cases/examples/chill/test_lex_order.c b/test-chill/test-cases/examples/chill/test_lex_order.c
new file mode 100644
index 0000000..1a3b26d
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/test_lex_order.c
@@ -0,0 +1,31 @@
+int main() {
+
+	int m, n;
+	int a[10];
+        int b[10]; 
+        int c[10];
+	int i, j;
+	for (i = 0; i < n; i++) {
+		for (j = 0; j < n; j++) {
+			b[j] = a[j];
+		}
+
+           
+                
+                for (j = 0; j < n; j++) {
+			a[j+1] = 6;
+		}
+
+                for (j = 0; j < n; j++) {
+			c[j] = a[j];
+		}
+
+
+       
+
+	}
+
+	return 0;
+
+}
+
diff --git a/test-chill/test-cases/examples/chill/test_lex_order.script b/test-chill/test-cases/examples/chill/test_lex_order.script
new file mode 100644
index 0000000..2629e50
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/test_lex_order.script
@@ -0,0 +1,12 @@
+#matrix multiply large array size for intel machine
+source: test_lex_order.c
+procedure: main
+format: rose
+loop: 0
+
+original()
+
+
+
+print
+
diff --git a/test-chill/test-cases/examples/chill/test_split.c b/test-chill/test-cases/examples/chill/test_split.c
new file mode 100644
index 0000000..6ca62cc
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/test_split.c
@@ -0,0 +1,14 @@
+int main() {
+
+	int a[10][10][10][10];
+	int i, j, k, l;
+
+	for (i = 0; i < 10; i++)
+		for (j = 0; j < 10; j++)
+			for (k = 0; k < 10; k++)
+				for (l = 0; l < 10; l++)
+					a[i][j][k + 1][l] = a[i][j][k][l];
+	//    a[i+1][j-1] = a[i][j];
+
+	return 0;
+}
diff --git a/test-chill/test-cases/examples/chill/test_split.script b/test-chill/test-cases/examples/chill/test_split.script
new file mode 100644
index 0000000..e1ebba9
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/test_split.script
@@ -0,0 +1,9 @@
+source: test_split.c
+procedure: main
+format: rose
+loop: 0
+original()
+N=10
+split(0,1, L3-L2-L4 <= 5)  
+print
+
diff --git a/test-chill/test-cases/examples/chill/test_split2.c b/test-chill/test-cases/examples/chill/test_split2.c
new file mode 100644
index 0000000..1ab8e43
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/test_split2.c
@@ -0,0 +1,14 @@
+int main() {
+
+	int a[10][10][10][10];
+	int i, j, k, l;
+
+	for (i = 0; i < 10; i++)
+		for (j = 0; j < 10; j++)
+			for (k = 0; k < 10; k++)
+				for (l = 0; l < 10; l++)
+					a[i][j][k + 1][l - 1] = a[i][j][k][l];
+	//    a[i+1][j-1] = a[i][j];
+
+	return 0;
+}
diff --git a/test-chill/test-cases/examples/chill/test_split2.script b/test-chill/test-cases/examples/chill/test_split2.script
new file mode 100644
index 0000000..bcaa2a0
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/test_split2.script
@@ -0,0 +1,9 @@
+source: test_split2.c
+procedure: main
+format: rose
+loop: 0
+original()
+N=10
+split(0,1, L4 <= 5)  
+print
+
diff --git a/test-chill/test-cases/examples/chill/test_tile.c b/test-chill/test-cases/examples/chill/test_tile.c
new file mode 100644
index 0000000..aeaaefc
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/test_tile.c
@@ -0,0 +1,16 @@
+void func(int n) {
+
+	int i;
+	int a[10];
+
+	for (i = 0; i < n; i++)
+		a[i] = 2;
+
+}
+
+int main() {
+
+	func(10);
+
+	return 0;
+}
diff --git a/test-chill/test-cases/examples/chill/test_tile.script b/test-chill/test-cases/examples/chill/test_tile.script
new file mode 100644
index 0000000..d437145
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/test_tile.script
@@ -0,0 +1,14 @@
+#matrix multiply large array size for intel machine
+source: test_tile.c
+procedure: func
+format : rose
+loop: 0
+
+original()
+#permute([3,2,1])
+tile(0,1,4)
+
+
+
+print
+
diff --git a/test-chill/test-cases/examples/chill/tile_violation.c b/test-chill/test-cases/examples/chill/tile_violation.c
new file mode 100644
index 0000000..d719e52
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/tile_violation.c
@@ -0,0 +1,12 @@
+int main() {
+
+	int i, j, k;
+	int a[10][10][10];
+
+	for (i = 0; i < 10; i++)
+		for (j = 0; j < 10; j++)
+			for (k = 0; k < 10; k++)
+				a[i][j + 1][k - 1] = a[i][j][k];
+
+	return 0;
+}
diff --git a/test-chill/test-cases/examples/chill/tile_violation.script b/test-chill/test-cases/examples/chill/tile_violation.script
new file mode 100644
index 0000000..57d1423
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/tile_violation.script
@@ -0,0 +1,14 @@
+#matrix multiply large array size for intel machine
+source: tile_violation.c
+procedure: main
+format :rose
+loop: 0
+
+original()
+#permute([3,2,1])
+tile(0,3,2,1)
+
+
+
+print
+
diff --git a/test-chill/test-cases/examples/chill/unroll.c b/test-chill/test-cases/examples/chill/unroll.c
new file mode 100644
index 0000000..68f4633
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/unroll.c
@@ -0,0 +1,31 @@
+#define N 14
+void foo(int n, float* x, float* y, float* z, float* f3, float* f1, float* w) {
+	int dt;
+
+	int i, j;
+
+	for (i = 1; i <= 14; i++)
+		x[i] = 1.0;
+
+	for (i = 1; i <= 14; i += 3)
+		y[i] = 1.0;
+
+	for (i = N + 1; i <= N + 20; i += 3)
+		z[i] = 1.0;
+
+	for (i = 0; i <= N; i++) {
+		for (j = i; j <= i + N; j++)
+			f3[i] = f3[i] + f1[j] * w[j - i];
+		f3[i] = f3[i] * dt;
+	}
+
+	return 0;
+}
+
+int main() {
+	float x[N], y[N], z[N], f3[N], f1[N], w[N];
+
+	foo(N, x, y, z, f3, f1, w);
+	return 0;
+}
+
diff --git a/test-chill/test-cases/examples/chill/unroll.script b/test-chill/test-cases/examples/chill/unroll.script
new file mode 100644
index 0000000..e64acb6
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/unroll.script
@@ -0,0 +1,35 @@
+#
+# Test unroll-and-jam. The last loop adapted from the simple
+# convolution example from p463 of "Optimizing Compilers for
+# Modern Architectures", by Randy Allen and Ken Kennedy.
+#
+
+source: unroll.c
+procedure: foo
+format: rose
+# fully unroll a loop with known iteration count
+loop: 0
+original()
+unroll(0,1,3)
+print
+print space
+
+
+# a strided loop
+loop: 1
+original()
+unroll(0,1,2)
+print
+print space
+
+# lower and upper bounds are not constant
+loop: 2
+original()
+unroll(0,1,20)
+print
+
+# parallelogram iteration space
+loop: 3
+original()
+unroll(0,1,2)
+print
diff --git a/test-chill/test-cases/examples/chill/unroll_violation.c b/test-chill/test-cases/examples/chill/unroll_violation.c
new file mode 100644
index 0000000..d719e52
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/unroll_violation.c
@@ -0,0 +1,12 @@
+int main() {
+
+	int i, j, k;
+	int a[10][10][10];
+
+	for (i = 0; i < 10; i++)
+		for (j = 0; j < 10; j++)
+			for (k = 0; k < 10; k++)
+				a[i][j + 1][k - 1] = a[i][j][k];
+
+	return 0;
+}
diff --git a/test-chill/test-cases/examples/chill/unroll_violation.script b/test-chill/test-cases/examples/chill/unroll_violation.script
new file mode 100644
index 0000000..019473d
--- /dev/null
+++ b/test-chill/test-cases/examples/chill/unroll_violation.script
@@ -0,0 +1,14 @@
+#matrix multiply large array size for intel machine
+source: unroll_violation.c
+procedure: main
+format: rose
+loop: 0
+
+original()
+#permute([3,2,1])
+unroll(0,2,2)
+
+
+
+print
+
diff --git a/test-chill/test-cases/examples/cuda-chill/cp.c b/test-chill/test-cases/examples/cuda-chill/cp.c
new file mode 100644
index 0000000..837d7a6
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/cp.c
@@ -0,0 +1,29 @@
+#define N 1
+
+#define VOLSIZEY 512
+#define VOLSIZEX 512
+#define VOLSIZEZ 1
+#define ATOMCOUNT 4000
+#define GRIDSPACING 0.1
+#define zDim 0
+
+extern float sqrtf(float);
+
+void cenergy_cpu(float atoms[ATOMCOUNT*4],float *energy,float z)
+{
+int i,j,n;float dx,dy,dz; 
+   
+    for (j=0; j<VOLSIZEY; j++) {
+        for (i=0; i<VOLSIZEX; i++) {
+            	  for (n=0;n<ATOMCOUNT;n+=4) {
+				dx = (GRIDSPACING * i) - atoms[n];
+				dy = (GRIDSPACING * j) - atoms[n+1];
+				dz = z - atoms[n+2];
+        		        energy[(j*VOLSIZEX + i)+VOLSIZEX*VOLSIZEY*zDim] += atoms[n+3]/sqrtf( (dx*dx) + (dy*dy)+ (dz*dz) ) ;
+            }
+              
+
+        }
+    }
+}
+
diff --git a/test-chill/test-cases/examples/cuda-chill/cp.lua b/test-chill/test-cases/examples/cuda-chill/cp.lua
new file mode 100644
index 0000000..1ef2264
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/cp.lua
@@ -0,0 +1,46 @@
+--CUBLAS 2 MM Multiply
+
+--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you
+--call init() and use global variables to specify procedure and loop
+
+--Second parameter is procedure # and third is loop #
+init("cp.c", "cenergy_cpu", 0) 
+
+dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
+                     --copy_to_shared methods
+V=512
+N=4000
+N=1
+
+Tj=32
+Ti=16
+Tii=16
+Tjj=16
+
+--normalize_index("j")
+--normalize_index("i")
+print_code()
+normalize_index("n")
+-- TILE COMMANDS ZEROOOOOOOOOOO:3
+--permute(0,{"i","j","n"})
+--tile_by_index({"i","j"},{Ti,Tj},{l1_control="ii",l2_control="jj"},{"ii","jj","i","j","n"})--CU=-1
+tile_by_index({"j","i"},{Tj,Ti},{l1_control="jj",l2_control="ii"},{"jj","ii","j","i","n"})--CU=-1
+--tile_by_index({"n"},{Tn},{l1_control="nn"},{"jj","ii","nn","j","i","n"})--CU=-1
+
+--tile_by_index({"j","i"},{Tjjj,Tiii},{l1_control="jjj",l2_control="iii"},{"jj","ii","nn","jjj","j","iii","i","n"})--CU=3
+--tile_by_index({"i","j"},{Tii,Tjj},{l1_control="iii",l2_control="jjj"},{"ii","jj","i","iii","j","jjj","n"})--CU=3
+--tile_by_index({"j"}, {Tn}, {l1_control="j",l1_tile="jjj"}, {"ii", "jj", "nn","jjj","j","i","n"})
+--tile_by_index({"i"}, {Tii}, {l1_control="iii",l1_tile="i"}, {"ii", "jj", "iii","i","j","n"})
+print_code()
+cudaize("kernel_GPU",{atoms=N*4,energy=V*V*1},{block={"jj","ii"}, thread={"j","i"}})--CU=3
+--cudaize("kernel_GPU",{atoms=N*4,energy=V*V*1},{block={"ii","jj"}, thread={"i","j"}})--CU=3
+print_code()
+copy_to_shared("tx","atoms",-16)
+copy_to_registers("tx","energy")
+--copy_to_texture("atoms")
+--unroll_to_depth(1)
+--unroll(0,9,0)
+--unroll(0,5,0)
+
+--unroll(0,8,256)
+print_code()
diff --git a/test-chill/test-cases/examples/cuda-chill/cudaize.lua b/test-chill/test-cases/examples/cuda-chill/cudaize.lua
new file mode 100644
index 0000000..7359cca
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/cudaize.lua
@@ -0,0 +1,1004 @@
+
+-- THIS IS CUDAIZE.LUA
+
+function table.contains_key(table, key)
+   for k in pairs(table) do
+      if k == key then
+         return true
+      end
+   end
+   return false
+end
+
+function valid_indices(stmt, indices)
+   --print( "valid_indices() lua calling C cur_indices")
+   --io.flush()
+   cur = cur_indices(stmt) 
+   --print("Cur indices "..list_to_string(cur))
+   for idx in pairs(indices) do
+      if not table.contains_key(cur,idx) then
+         return false
+      end
+   end
+   return true
+end
+
+function next_clean_level(cur_idxs,level)
+   --print("next_clean_level( ..., "..level.." )")
+   --print(string.format("indices_at_each_level %s ",list_to_string(cur_idxs) ))
+   
+   --print("loop to "..#cur_idxs)
+   for i=level+1,#cur_idxs do
+      --print("Checking level "..i.." = '"..cur_idxs[i].."'")
+      if (# cur_idxs[i] > 0) then
+         --print("Good enough"..(# cur_idxs[i]))
+         --print("returning "..i)
+         return i
+      end
+   end
+   return -1 --sentinal that there were no non-dummy indices left
+end
+
+function build_order(final_order, tile_idx_names, ctrl_idx_names, tile_idx_map, cur_level)
+   order = {}
+   --print("\nbuild_order()")
+   --print("build_order(): final_order = ( "..list_to_string(final_order).." )")
+   --print("build_order(): ctrl_idx_names = ("..list_to_string(ctrl_idx_names).." )")
+   --print("cur_level "..cur_level.."")
+   --io.flush()
+   
+   for i,k in ipairs(final_order) do
+      skip = false
+      cur = final_order[i]
+      --print("\ncur "..cur.." = final_order["..i.."] = "..final_order[i].."  ")
+      --control loops below our current level should not be in the current order
+      for j=cur_level+2,# ctrl_idx_names do
+         --print("j "..j.." final_order["..i.."] = "..final_order[i].."  ")
+         if ctrl_idx_names[j] == final_order[i] then
+            skip = true
+            --print("SKIP "..final_order[i].."  ")
+            --io.flush()
+         end
+      end
+      --possibly substitute tile indices ifn necessar
+      if table.contains_key(tile_idx_map,final_order[i]) then
+         approved_sub = false
+         sub_string = tile_idx_map[final_order[i]]
+         for j=cur_level+2,# tile_idx_names do
+            if tile_idx_names[j] == sub_string then
+               approved_sub = true
+            end
+         end
+         if approved_sub then
+            cur = sub_string
+         end
+      end
+      if not skip then
+         table.insert(order,cur)
+      end
+   end
+   return order
+end
+
+function list_to_string(str_list)
+   --Helpful debug output
+   l = ""
+   for i,str in ipairs(str_list) do
+      if i > 1 then
+         l = l .. ", " .. str
+      else
+         l = str
+      end
+   end
+   return l
+end
+
+
+function find_cur_level(stmt,idx)
+   --Search cur_indices for a idx at stmt
+   cur = cur_indices(stmt)
+   --print(string.format("find_cur_level(stmt %d, idx %s)  Cur indices %s", stmt, idx, list_to_string(cur)))
+   for i,cidx in ipairs(cur) do
+      if cidx == idx then
+         --print(string.format("found it at index %d", i))
+         return i
+      end
+   end
+   error("Unable to find "..idx.." in current list of indices")
+end
+
+
+function chk_cur_level(stmt,idx)
+   --Search cur_indices for a idx at stmt
+   cur = cur_indices(stmt)
+   for i,cidx in ipairs(cur) do
+      if cidx == idx then
+         return i
+      end
+   end
+   return -1
+end
+
+
+function find_offset(cur_order, tile, control)
+   --print("Looking for tile '"..tile.."' and control '"..control.."' in ( "..list_to_string(cur_order)..", )")
+   idx1 = -1
+   idx2 = -1
+   for i,cur in ipairs(cur_order) do
+      if(cur == tile) then
+         idx1 = i
+      end
+      if(cur == control) then
+         idx2 = i
+      end
+   end
+   if(idx1 < 0) then
+      error("Unable to find tile " .. tile .. " in current list of indices")
+   end
+   if(idx2 < 0) then
+      error("Unable to find control " .. control .. " in current list of indices")
+   end
+   --print("found at level " .. idx2 .. " and " .. idx1)
+   if(idx2 < idx1) then
+      return idx2-idx1+1
+   else
+      return idx2-idx1
+   end
+end
+
+function tile_by_index(tile_indices, sizes, index_names, final_order, tile_method)
+   --print "STARTING TILE BY INDEX"
+   --io.flush()
+   stmt = 0 --assume stmt 0
+   cur = cur_indices(stmt)
+   --print("Cur indices "..list_to_string(cur))
+   if not valid_indices(stmt,tile_indices) then
+      error('One of the indices in the first parameter were not '..
+            'found in the current set of indices.')
+   end
+   if not tile_method then tile_method = counted end
+   tile_idx_names = {}
+   for i,s in ipairs(tile_indices) do tile_idx_names[i]=s end --shallow copy
+   --print("tile_index_names: ['"..list_to_string(tile_indices).."']")
+   
+   --print("index_names:  ") 
+   --for k,v in pairs(index_names) do print(k,v) end
+   
+   --io.flush()
+   
+   ctrl_idx_names = {}
+   tile_idx_map = {}
+   for k,v in pairs(index_names) do
+      valid = false
+      if(string.sub(k,1,1) == "l") then
+         if string.sub(k,-8) == "_control" then
+            i = tonumber(string.sub(k,2,-9))
+            if i and i >= 1 and i <= (# tile_indices) then
+               ctrl_idx_names[i] = v
+               --print(string.format("Handling control %s for loop level %d",v,i))
+               --print("control "..k.."   name  "..v.." ")
+               valid = true
+            end
+         elseif string.sub(k,-5) == "_tile" then
+            i = tonumber(string.sub(k,2,-6))
+            if i and i >= 1 and i <= (# tile_indices) then
+               --print(string.format("tile %s -> %s",tile_indices[i], v))
+               tile_idx_names[i] = v
+               tile_idx_map[v] = tile_indices[i]
+               --print(string.format("tile %s -> %s",tile_indices[i], v))
+               valid = true
+            end
+         end
+      end
+      if not valid then error(string.format("%s is not a proper key for specifying "..
+                                            "tile or control loop indices\n", k)) end
+   end
+   
+   --filter out control indices (and do name substitution of unprocessed tile indices) for a given level
+   cur_order = build_order(final_order, tile_indices, ctrl_idx_names, tile_idx_map, -1)
+   permute(stmt, cur_order)
+   
+   for i,cur_idx in ipairs(tile_indices) do
+      --print(string.format("i %d  cur_idx %s calling build order ********", i-1, cur_idx))
+      cur_order = build_order(final_order, tile_indices, ctrl_idx_names, tile_idx_map, i-1)
+      --Find a offset between tile loop and control loop
+      -- 0   = control loop one level above tile loop
+      -- -1  = control loop two levels above tile loop
+      -- > 0 = tile loop above control loop
+      -- In the last case, we do two extra tile commands to get the control
+      -- above the tile and then rely on the final permute to handle the
+      -- rest
+      level = find_cur_level(stmt,cur_idx)
+      offset = find_offset(cur_order, tile_idx_names[i], ctrl_idx_names[i])
+      --print(string.format("offset %d", offset))
+      
+      if (offset <= 0) then
+         --print(string.format("[offset<=0]1tile(%d, %d, %d, %d, %s, %s, %s)",stmt, level, sizes[i], level+offset, tile_idx_names[i], ctrl_idx_names[i], tile_method)) 
+         tile(stmt, level, sizes[i], level+offset, tile_idx_names[i], ctrl_idx_names[i], tile_method)
+      else
+         --print(string.format("2tile(%d, %d, %d, %d, %s, %s, %s)", stmt, level, sizes[i], level, tile_idx_names[i], ctrl_idx_names[i], tile_method))
+         tile(stmt, level, sizes[i], level, tile_idx_names[i], ctrl_idx_names[i], tile_method);--regular level
+         --flip tile and control loop
+         --print(string.format("3tile(%d, %d, %d)",stmt, level+1, level+1))
+         tile(stmt, level+1, level+1);
+         --print(string.format("4tile(%d, %d, %d)",stmt, level+1, level))
+         tile(stmt, level+1, level);
+         --print(string.format("\n[offset>0]tile(%d, %d, %d, %d,%s,%s,%s)",stmt, level, sizes[i], level, tile_idx_names[i], ctrl_idx_names[i], tile_method)) 
+	 --print_code()
+         
+      end
+      
+      --Do permutation based on cur_order
+      --print "permute based on build order calling build_order()"
+      --print "cur_order = build_order(final_order, tile_indices, ctrl_idx_names, tile_idx_map, i-1)"
+      cur_order = build_order(final_order, tile_indices, ctrl_idx_names, tile_idx_map, i-1)
+      --print "permute(stmt, cur_order);"
+      permute(stmt, cur_order);
+      --print "\nafter permute(), code is:"
+      --print_code()
+   end
+   --print "ENDING TILE BY INDEX"
+   --print_code()
+end
+
+function normalize_index(index)
+   stmt = 0 --assume stmt 0cur = cur_indices(stmt)
+   --print("Cur indices "..list_to_string(cur))
+   l = find_cur_level(stmt, index)
+   tile(stmt, l, l)
+   --print(string.format("\n[Normalize]tile(%d, %d, %d)",stmt, l,l)) 
+end
+
+function is_in_indices(stmt, idx)
+   cur = cur_indices(stmt)
+   for i=0,#cur,1 do
+      if(cur[i]==idx) then
+         return true
+      end
+   end
+   return false
+   
+end
+
+
+function copy_to_registers(start_loop, array_name)
+   
+   --print("\n\n****** starting copy to registers")
+   io.flush()
+
+   stmt = 0 --assume stmt 0
+   
+   -- [Malik] first we make sure that tx and ty are consecutive loops in the 2D thread setup, otherwise all levels for subsequent operations are messed up. Start logic.
+   cur = cur_indices(stmt)
+   table_Size = table.getn(cur)
+   
+   --print(string.format("Cur indices %s,",list_to_string(cur)))
+   --print(string.format("The table size is %d", table_Size))
+   --table.foreach(cur, print)
+   --print_code()
+   
+   level_tx = -1
+   level_ty = -1
+   if is_in_indices(stmt,"tx") then level_tx = find_cur_level(stmt,"tx") end
+   if is_in_indices(stmt,"ty") then level_ty = find_cur_level(stmt,"ty") end
+   --print(string.format("level_tx %d  level_ty %d", level_tx, level_ty))
+   
+   ty_lookup_idx = "" 
+   org_level_ty = level_ty
+   
+   --if(cur[level_tx+1]~=nil and cur[level_tx+1]~="") then ty_lookup = ty_lookup+1 end
+   if(cur[level_ty+1]~=nil and cur[level_ty+1]~="") then 
+      --print(string.format("IF  cur[%d] = %s", level_ty+1, cur[level_ty+1]))
+      ty_lookup_idx = cur[level_ty+1] 
+   else
+      --if cur[level_ty]  ~= nil then print(string.format("ELSE ty_lookup_idx = cur[%d] = %s", level_ty, cur[level_ty])) --   TODO 
+      --else print "ELSE (dangerous)" end
+      ty_lookup_idx = cur[level_ty]  -- may assign nil !?
+   end
+   --if ty_lookup_idx ~= nil then print(string.format("ty_lookup_idx '%s'", ty_lookup_idx))  --  TODO 
+   --else print "ty_lookup_idx is NIL"
+   --end
+   
+   if level_ty > 0 then
+      --print(string.format("\ntile3(%d,%d,%d)",stmt,level_ty,level_tx+1))
+      tile(stmt,level_ty,level_tx+1) 
+   end
+   --print_code()
+   
+   --print("\ntylookup is %d",ty_lookup)
+   --exit(0)
+   --
+   cur = cur_indices(stmt)
+   table_Size = table.getn(cur)
+   --print(string.format("Cur indices %s,",list_to_string(cur)))
+   --print("The table size is "..table.getn(cur))
+   --table.foreach(cur, print)
+   
+   if is_in_indices(stmt,"tx") then   level_tx = find_cur_level(stmt,"tx") end
+   if ty_lookup_idx then
+      if is_in_indices(stmt,ty_lookup_idx) then level_ty = find_cur_level(stmt,ty_lookup_idx) end
+   end
+   
+   ty_lookup = 1
+   idx_flag = -1
+   -- find the level of the next valid index after ty+1
+   --print(string.format("\nlevel_ty %d", level_ty))
+   if level_ty > 0 then
+      --print(string.format("table_Size %d", table_Size))
+      for num= level_ty+ty_lookup,table_Size do
+         --print(string.format("num=%d   cur[num] = '%s'",num, cur[num]))
+         if(cur[num] ~= "") then
+            idx_flag = find_cur_level(stmt,cur[num])
+            --print (string.format("idx_flag = %d", idx_flag))
+            break
+         end
+      end
+   end
+   
+   --print(string.format("\n(first) I am checking all indexes after ty+1 %s",idx_flag))
+   --print_code()
+   --print ""
+   
+   how_many_levels = 1
+   startat = idx_flag + 1
+   if startat == 0 then startat = 1 end  -- avoid attempt to examine an illegal array offset
+   --print(string.format("idx_flag = %d   I will check levels starting with %d", idx_flag, idx_flag+1))
+   
+   for ch_lev = startat,table_Size,1 do    -- was for ch_lev = idx_flag+1,table_Size,1 do
+      --print(string.format("ch_lev %d", ch_lev))
+      if(cur[ch_lev] ~= nil and cur[ch_lev] ~= "") then
+         --print(string.format("cur[%d] = '%s'", ch_lev, cur[ch_lev])) 
+         how_many_levels = how_many_levels+1
+      end
+   end
+   --print("\nHow Many Levels",how_many_levels)
+   
+   -- change this all to reflect the real logic which is to normalize all loops inside the thread loops. 
+   if(how_many_levels <2) then
+      while( idx_flag >= 0) do
+         for num = level_ty+ty_lookup,(table_Size) do
+            --print(string.format("at top of loop, num is %d", num))
+            --print(string.format("num %d", num))
+            --print(string.format("cur[num] = '%s'", cur[num]))
+            if(cur[num] ~= "") then
+               idx=cur[num]
+               --print(string.format("idx '%s'", idx))
+               
+               curlev = find_cur_level(stmt,idx)
+               --print(string.format("curlev %d", curlev))
+               
+               --print_code()
+               --print(string.format("\n[COPYTOREG]tile(%d,%d,%d)",stmt,find_cur_level(stmt,idx),level_tx))
+               tile(stmt,find_cur_level(stmt,idx),find_cur_level(stmt,idx))
+               curlev = find_cur_level(stmt,idx)
+               --print(string.format("curlev %d", curlev))
+               tile(stmt,find_cur_level(stmt,idx),level_tx)
+               --print(string.format("hehe '%s'",cur[num]))
+               
+               cur = cur_indices(stmt)
+               --print("Cur indices INSIDE"..list_to_string(cur))
+               table_Size = table.getn(cur)
+               --print(string.format("Table Size is: %d",table_Size))
+               level_tx = find_cur_level(stmt,"tx")
+               --print(string.format("\n level TX is: %d",level_tx))
+               level_ty = find_cur_level(stmt,ty_lookup_idx)
+               --print(string.format("\n level TY is: %d",level_ty))
+               idx_flag = -1
+               --print "idx_flag = -1"
+               
+               -- find the level of the next valid index after ty+1
+               
+               -- the following was num, which conflicts with loop we're already in, and otherwise wasn't used (?)
+               for num= level_ty+ty_lookup,table_Size do
+                  --print(string.format("num mucking num = %d", num))
+                  if(cur[num] ~= nil and cur[num] ~= "") then
+                     idx_flag = find_cur_level(stmt,cur[num])
+                     --print("\n(second) I am checking all indexes after ty+1 %s",cur[num])
+                     break
+                  end
+               end
+               --print(string.format("num mucked to %d     idx_flag = %d", num, idx_flag))
+               
+            end
+            --print(string.format("at bottom of loop, num is %d", num))
+         end
+      end
+   end
+   --print "done with levels"
+   
+   
+   
+   
+   --print "ARE WE SYNCED HERE?"
+   --print_code()
+   --print("\ntile(%d,%d,%d)",stmt,level_k,level_k)
+   --tile(stmt,level_k,level_k)
+   
+   -- [Malik] end logic
+   --print_code()
+   start_level = find_cur_level(stmt, start_loop)
+   --We should hold contant any block or tile loop
+   block_idxs = block_indices()
+   thread_idxs = thread_indices()
+   --print("\nblock indices are")
+   --table.foreach(block_idxs, print)
+   --print("\nthread indices are")
+   --table.foreach(thread_idxs, print)
+   --print(string.format("\nStart Level: %d",start_level))
+   
+   hold_constant = {}
+   --print("\n Now in Blocks")
+   for i,idx in ipairs(block_idxs) do
+      --print(string.format("\n Idx:%s : Level: %d",idx,find_cur_level(stmt,idx)))
+      if find_cur_level(stmt,idx) >= start_level then
+         table.insert(hold_constant, idx)
+         --print(string.format("\nJust inserted block %s in hold_constant",idx))
+      end
+   end
+   
+   
+   --print("\n Now in Threads")
+   for i,idx in ipairs(thread_idxs) do
+      --print(string.format("\n Idx:%s : Level: %d",idx,find_cur_level(stmt,idx)))
+      if find_cur_level(stmt,idx) >= start_level then
+         table.insert(hold_constant, idx)
+         --print(string.format("\nJust inserted thread %s in hold_constant",idx))
+      end
+   end
+   
+   --print "\nhold constant table is: "
+   --table.foreach(hold_constant, print)
+   
+   --print("\nbefore datacopy pvt")
+   old_num_stmts = num_statements()
+   --print_code()
+   --print(string.format("\n[DataCopy]datacopy_privatized(%d, %s, %s, vector having privatized levels)",stmt, start_loop, array_name)) 
+   --table.foreach(hold_constant, print)
+   datacopy_privatized(stmt, start_loop, array_name, hold_constant)
+   
+   --print(hold_constant)
+   new_num_stmts = num_statements()
+   --print("\nthe num of statements:%d\n",new_num_stmt)
+   --print_code()
+   --exit(0)
+   -- [Malik] normalize the copy loops created.
+   cur = cur_indices(old_num_stmts)
+   --print("Cur indices "..list_to_string(cur))
+   for cidx,i in ipairs(cur) do
+      if i ~= "tx" and i~="ty" and i~="bx" and i~="by" then
+         --tile(old_num_stmts,find_cur_level(old_num_stmts,i),find_cur_level(old_num_stmts,i))
+         --print("\nTILE OF REG: tile(%d,%d,%d)",old_num_stmts,find_cur_level(old_num_stmts,i),find_cur_level(old_num_stmts,i))
+      end
+   end
+   --print_code()
+   --print("\nthe num of statements OLD+1 :",(old_num_stmts+1))  
+
+
+--[[ 
+   is this commented out? why yes, yes it is   block comment 
+   if( (old_num_stmts+1) <= new_num_stmts) then
+      cur = cur_indices(old_num_stmts+1)
+      --print("Cur indices+1 "..list_to_string(cur))
+      for cidx,i in ipairs(cur) do
+         if i ~= "tx" and i~="ty" and i~="bx" and i~="by" then
+            tile(old_num_stmts+1,find_cur_level(old_num_stmts+1,i),find_cur_level(old_num_stmts+1,i))
+	    --print("\nTILE OF REG: tile(%d,%d,%d)",old_num_stmts+1,find_cur_level(old_num_stmts+1,i),find_cur_level(old_num_stmts+1,i))
+         end
+      end
+   end
+--]]
+
+
+   --Unroll to the last thread level
+   --for stmt=old_num_stmts,new_num_stmts-1 do
+   -- level = find_cur_level(stmt,thread_idxs[#thread_idxs])--get last thread level
+   --if level < #cur_indices(stmt) then
+   -- unroll(stmt,level+1,0)
+   --print(string.format("\n[Unroll]unroll(%d, %d, 0)",stmt, level+1)) 
+   ----print_code()
+   --end
+   --end
+   io.flush()
+   --print("****** ending copy to registers\n\n")
+   --io.flush()
+end
+
+function copy_to_shared(start_loop, array_name, alignment)
+   --print(string.format("\nstarting copy to shared(%s, %s, %d )",start_loop,array_name,alignment))
+   stmt = 0 --assume stmt 0
+   cur = cur_indices(stmt)
+   --print("Cur indices "..list_to_string(cur))
+   
+   start_level = find_cur_level(stmt, start_loop)
+   --print(string.format("start_level %d", start_level))
+   
+   old_num_stmts = num_statements()
+   --print(string.format("old_num_statements %d", old_num_stmts))
+   
+   --Now, we give it indices for up to two dimentions for copy loop
+   copy_loop_idxs = {"tmp1","tmp2"}
+   --print(string.format("\n[DataCopy]datacopy(%d, %d, %s, {\"tmp1\",\"tmp2\"},false,0,1,%d,true)",stmt, start_level, array_name, alignment)) 
+   datacopy(stmt, start_level, array_name, copy_loop_idxs, false, 0, 1, alignment,true)
+   
+   add_sync(stmt,start_loop)
+   new_num_stmts = num_statements()
+   
+   --This is fairly CUBLAS2 specific, not sure how well it generalizes,
+   --but for a 2D copy, what we want to do is "normalize" the first loop
+   --"tmp1" then get its hard upper bound. We then want to tile it to
+   --make the control loop of that tile "ty". We then tile "tmp2" with a
+   --size of 1 and make it "tx".
+   --print(string.format("fairly CUBLAS2 specific, OLD %d  NEW %d",  old_num_stmts, new_num_stmts ))
+   
+   for stmt=old_num_stmts,new_num_stmts-1 do
+      --print(string.format("for stmt = %d", stmt))
+      was_no_error, level = pcall(find_cur_level, stmt, "tmp2")
+      
+      if was_no_error then 
+         --print_code() 
+         --print("\nCopy to shared: [If was no error]\n")
+         find_cur_level(stmt,"tmp2")
+         tile(stmt, level, level)
+         
+         lower,upper = hard_loop_bounds(stmt, level)
+         upper = upper + 1
+         --print(string.format("lower %d  upper %d", lower, upper))
+         
+         tx,ty = thread_dims()
+         --print("2-loop cleanup: lower, upper: "..lower..", "..upper..", tx: "..tx)
+         
+         level = find_cur_level(stmt,"tmp1")
+         --print(string.format("level %d", level))
+         
+         if tx == upper and ty == 1 then
+            --print(string.format("tx = %d    upper = %d     ty = %d", tx, upper, ty))
+            --print "Don't need"
+            
+            --Don't need an extra tile level, just move this loop up
+            second_level = find_cur_level(stmt,"tmp2")
+            --print(string.format("\n[Tile0]tile(%d, %d, 1, %d,%s,%s,counted)",stmt, second_level, level, "tx", "tx")) 
+            tile(stmt, second_level, 1, level, "tx", "tx", counted)
+         else
+            --print "DO need?"
+            --print_code()
+            if(ty == 1) then new_ctrl = "tmp3" else new_ctrl = "ty" end
+
+
+--[[ Commenting out a block of Gabe's code in this control flow
+               -- level = find_cur_level(stmt,"tmp1")
+               tile(stmt, level, level)
+
+               lower,upper = hard_loop_bounds(stmt, level)
+               upper = upper + 1
+               --print_code()
+               --print("2-loop cleanup: lower, upper: "..lower..", "..upper..", tx: "..tx..", level: "..level)
+               if(math.ceil(upper/ty) > 1)then
+                  tile(stmt, level, math.ceil(upper/ty), level, "tmp", new_ctrl, counted)
+                  --print(string.format("\n[Tile1]tile(%d, %d, %f[%d,%d], %d,%s,%s,counted)",stmt, level,  math.ceil(upper/ty),upper,ty, level, "tmp", new_ctrl)) 
+               else
+                  tile(stmt, level, math.ceil(upper/ty), level, "ty", new_ctrl, counted)
+		  --print(string.format("\n[Tile1]tile(%d, %d, %f[%d,%d], %d,%s,%s,counted)",stmt, level,  math.ceil(upper/ty),upper,ty, level, "tx", new_ctrl))
+               end
+               
+               --print_code()    
+               -- [Malik] If here we have the loop upper bound > tx, then we should tile once more after the next tile, to carve out the correct tx. 
+               lower1,upper1 = hard_loop_bounds(stmt,level)
+               level1 = level
+               stmt1 = stmt
+               -- [Malik] Do the tile after the second level tile with if condition. Just to keep the original order, the tile is being pushed to the end. 
+               
+               --print("[Malik]-loop cleanup: lower1, upper1: "..lower1..", "..upper1..", tx: "..tx..", level:"..level1)
+
+               --print_code()
+               --level = find_cur_level(stmt,"tmp")
+               --tile(stmt,level,level)
+               --print_code() 
+               
+               --[Malik] if you are moving the loop above the level1, you need to update level1 with new position which would be level1+2 or second_level
+               if(level <= level1) then level1 = level1+2 end
+ 	       --print(string.format("\n[Tile2]tile(%d, %d, 1, %d,%s,%s,counted)",stmt, second_level, level, "tx", "tx")) 
+               --print("\n----------------------------------")
+               --print_code()
+               --print("\n**********************************")
+               --print("[Malik]-loop cleanup: lower1, upper1: "..lower1..", "..upper1..", tx: "..tx..", level:"..level1)
+               -- [Malik] If the upper bound > tx, we do another tile to carve out the correct tx from a bigger loop. Else just normalize the bounds. 
+               if( upper1 > ty) then
+                  third_level = find_cur_level(stmt1,"tmp")
+                  --print("\n\n\n\t\t\t\tthirdlevel:"..third_level)
+                  tile(stmt1, third_level, ty, third_level, "ty", "tmp", counted)
+                  --print(string.format("\n[Tile3]tile(%d, %d, %d,%d,%s,%s,counted)",stmt1, third_level, ty,third_level, "ty", "tmp"))
+                  tile(stmt1,third_level+1,third_level+1)
+                  --print(string.format("\n[Tile3]tile(%d, %d, %d)",stmt1, third_level+1, third_level+1))
+                  tile(stmt1,third_level+1,third_level)
+                  --print(string.format("\n[Tile3]tile(%d, %d, %d)",stmt1, third_level+1, third_level))
+               else
+                  tile(stmt1,level1,level1)
+                  --print(string.format("\n[Tile3ELSE]tile(%d, %d, %d)",stmt1,level1,level1))
+               end
+               
+               --print("\nStarting tmp2\n");--print_code();
+               second_level = find_cur_level(stmt,"tmp2")
+               lower,upper = hard_loop_bounds(stmt,second_level)
+               level = second_level
+               --print("[Malik]-loop cleanup@tmp2: lower, upper: "..lower..", "..upper..", tx: "..tx..", level:"..level)
+               
+               if(math.ceil(upper/tx) > 1)then
+                  tile(stmt, second_level,math.ceil(upper/tx), level, "tmp", "tx", counted)
+                  --print(string.format("\n[Tile2]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, second_level,math.ceil(upper/tx),second_level, "tmp", "tx"))
+               else
+                  tile(stmt, second_level,math.ceil(upper/tx), level, "tx", "tx", counted)
+                  --print(string.format("\n[Tile2]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, second_level,math.ceil(upper/tx),second_level, "tx", "tx"))
+               end
+               --print_code()
+               lower2,upper2 = hard_loop_bounds(stmt,level)
+               level2 = level
+               stmt2 = stmt
+               --print("[Malik]-loop cleanup@tmp2: lower2, upper2: "..lower2..", "..upper2..", tx: "..tx..", level:"..level2)
+               -- now for the second level.
+               if( upper2 > tx) then
+                  forth_level = find_cur_level(stmt2,"tmp")
+                  --print("\n\n\n\t\t\t\tforthlevel:"..forth_level)
+                  --print_code()
+                  tile(stmt2, forth_level, 1, forth_level, "tx", "tmp", counted)
+                  --print(string.format("\n[Tile3B]tile(%d, %d, %d,%d,%s,%s,counted)",stmt2, forth_level, tx,forth_level, "ty", "tmp"))
+                  --print_code()
+                  --tile(stmt2,forth_level+1,forth_level+1)
+                  --print(string.format("\n[Tile3B]tile(%d, %d, %d)",stmt2, forth_level+1, forth_level+1))
+                  --tile(stmt2,forth_level+1,forth_level)
+                  --print(string.format("\n[Tile3B]tile(%d, %d, %d)",stmt2, forth_level+1, forth_level))
+               else
+                  new_level = find_cur_level(stmt2,"ty")
+                  tile(stmt2,level2,1,new_level,"tx","tx",counted)
+                  --print(string.format("\n[Tile3BELSE]tile(%d, %d, %d)",stmt2,level2,level2))
+                  tmp_level = find_cur_level(stmt2,"tmp")
+                  tile(stmt2,tmp_level,tmp_level)
+               end
+               
+               --print_code()
+               --print("\n----------------------------------")
+--]]
+               
+               --print_code() 
+               --print("\nStarting tmp2\n");--print_code();
+               first_level = find_cur_level(stmt,"tmp1")
+               second_level = find_cur_level(stmt,"tmp2")
+               lower,upper = hard_loop_bounds(stmt,second_level)
+               
+               --print("[Malik]-loop cleanup@tmp2: lower, upper: "..lower..", "..upper..", tx: "..tx..",first level:"..first_level..",second_level:"..second_level)
+               
+               -- Move the fastest changing dimension loop to the outermost,identified by "tmp2" and to be identified as tx.
+               --print(string.format("\n[fastest]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, second_level,1,first_level, "tx", "tx"))
+               tile(stmt,second_level,1,first_level,"tx","tx",counted)
+               --print_code()
+               
+               first_level = find_cur_level(stmt,"tmp1")
+               lower_1,upper_1 = hard_loop_bounds(stmt,first_level)
+               tx_level = find_cur_level(stmt,"tx")
+               lower_tx,upper_tx = hard_loop_bounds(stmt,tx_level)
+               --print(string.format("UL_1 %d %d     UL_tx %d %d", lower_1, upper_1, lower_tx, upper_tx))
+               
+               if(math.ceil(upper_tx/tx) > 1)then
+                  --print "ceil I say"
+                  --print(string.format("\n[Tile1]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, tx_level,tx,tx_level, "tx", "tmp1"))
+                  tile(stmt,tx_level,tx,tx_level,"tx","tmp_tx",counted)
+                  --print_code()
+                  
+                  peat = find_cur_level(stmt,"tx")
+                  --print(string.format("\n[Tile1]tile(%d, %d, %d)",stmt, peat, peat))
+                  tile(stmt, peat, peat )  --find_cur_level(stmt,"tx"),find_cur_level(stmt,"tx"))
+                  --print_code()
+                  
+                  if (find_cur_level(stmt,"tx")>find_cur_level(stmt,"tmp_tx")) then
+                     --print(string.format("\nagain [Tile1]tile(%d, %d, %d)",stmt,find_cur_level(stmt,"tx"),find_cur_level(stmt,"tmp_tx")))
+                     tile(stmt,find_cur_level(stmt,"tx"),find_cur_level(stmt,"tmp_tx"))
+                     --print_code()
+                  end
+                  --else
+                  --tile(stmt, tx_level,1, tx_level, "tx", "tx", counted)
+                  --print(string.format("\n[Tile2]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, tx_level,1,tx_level, "tx", "tx"))
+               end
+               --print_code()
+               --]]  -- this apparently is NOT the end of a block comment
+               
+               --print("\nStarting tmp1\n")
+               -- Handle the other slower changing dimension, the original outermost loop, now identified by "tmp1", to be identified as "ty".
+               tile(stmt,find_cur_level(stmt,"tmp1"),find_cur_level(stmt,"tmp1"))     
+               --print_code()  
+               
+               ty_level = find_cur_level(stmt,"tmp1")
+               lower_ty,upper_ty = hard_loop_bounds(stmt,ty_level)
+               
+               tx_level = find_cur_level(stmt,"tx")
+               lower_tx,upper_tx = hard_loop_bounds(stmt,tx_level)
+               --print("[Malik]-loop cleanup@tmp1: lowerty, upperty: "..lower_ty..", "..upper_ty..", ty: "..ty..",ty level:"..ty_level..",tx_level:"..tx_level..", stmt: "..stmt)
+               
+               --print "before ceil"
+               if(math.ceil(upper_ty/ty) > 1)then
+                  --print "CEIL IF"
+                  --print("\n Inside upper_ty/ty > 1\n");
+                  
+                  --print(string.format("\n[Tile2]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, ty_level,ty,ty_level, "ty", "tmp_ty"))
+                  tile(stmt,ty_level,ty,ty_level,"ty","tmp_ty",counted)
+                  --print_code()
+                  
+                  --print(string.format("\n[Tile2-1]tile(%d, %d, %d)",stmt,find_cur_level(stmt  ,"ty"),find_cur_level(stmt,"ty")))
+                  tile(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"ty"))
+                  --print_code()
+                  
+                  -----------------------------------------------------------------------
+                  ----------------------------------------------------------------------
+                  cur_idxs = cur_indices(stmt)
+                  --print("\n cur indexes are "..list_to_string(cur_idxs))
+                  
+                  -- Putting ty before any tmp_tx   
+                  idx_flag = -1
+                  for num= 0,table.getn(cur_idxs) do
+                     if(cur[num] == "tmp_tx") then
+                        idx_flag = find_cur_level(stmt,cur[num])
+                        break
+                     end
+                  end
+                  --print(string.format("\n (1) so i have found out the value of idx flag as %d",idx_flag) )
+                  
+                  if(idx_flag >=0 ) then  
+                     if (find_cur_level(stmt,"ty")>find_cur_level(stmt,"tmp_ty")) then
+                        --print(string.format("\n[Tile2-2]tile(%d, %d, %d)",stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty")))
+                        tile(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))
+                        --print_code()
+                     end
+                  end
+                  
+                  -- Now Putting ty before any tmp_ty
+                  idx_flag = -1
+                  for num= 0,table.getn(cur_idxs) do
+                     if(cur[num] == "tmp_ty") then
+                        idx_flag = find_cur_level(stmt,cur[num])
+                        break
+                     end
+                  end
+		  --print(string.format("\n IF  so i have found out the value of idx flag as %d",idx_flag) )
+                  if(idx_flag >=0 ) then  
+                     --print "one more test"
+                     if ((find_cur_level(stmt,"ty")>find_cur_level(stmt,"tmp_ty"))) then
+                        --print(string.format("\n[Tile2-2]tile(%d, %d, %d)",stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty")))
+                        tile(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))
+                        --print_code()
+                     end
+                  end
+               else
+                  --print "CEIL ELSE"
+                  --cur_idxs = cur_indices(stmt)
+                  --print("\n Inside upper_ty/ty <= 1\n");
+                  
+                  --print(string.format("\n[Tile3]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, ty_level,1,ty_level, "ty", "ty"))
+                  tile(stmt, ty_level,1, ty_level, "ty", "ty", counted)
+                  --print_code()
+                  
+                  --print(string.format("\n[Tile3-1]tile(%d, %d, %d)",stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tx")+1))
+                  tile(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tx")+1)
+                  --print_code()
+                  
+                  idx_flag = -1
+                  if(cur_idxs) then
+                     --print "CAN NEVER GET HERE?  cur_idxs"
+                     for num= 0,table.getn(cur_idxs) do
+                        if(cur[num] == "tmp_ty") then
+                           idx_flag = find_cur_level(stmt,cur[num])
+                           break
+                        end
+                     end
+                  end
+                  --print(string.format("\n ELSE so i have found out the value of idx flag as %d",idx_flag) )
+                  if(idx_flag >=0 ) then  
+                     if (find_cur_level(stmt,"ty")>find_cur_level(stmt,"tmp_ty")) then
+                        --print(string.format("tile( stmt %d, level ty %d, level ty %d",stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))) 
+                        tile(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))
+                        --print(string.format("\n[Tile3-2]tile(%d, %d, %d)",stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty")))
+                     end
+                  end
+               end
+               
+               --print_code()
+         end
+         
+         
+         --print "\n\n *** at bottom of if in copy to shared, "
+         --print_code()
+         --print "end of if"
+         
+      else
+         --copy to shared only created one level, not two, so we use a different approach (MV & TMV)
+         --print("\nCopy to shared: [If was error]\n")
+         level = find_cur_level(stmt,"tmp1")
+         tile(stmt, level, level)
+         
+         --print(string.format("\n[Tile]tile(%d, %d, %d)",stmt, level, level)) 
+         tx,ty = thread_dims()
+         lower,upper = hard_loop_bounds(stmt, level)
+         upper = upper+1 --upper bound given as <=, compare to dimensions tx which is <
+         --print("upper "..upper.." tx "..tx)
+         if upper == tx then
+            rename_index(stmt, "tmp1", "tx")
+         else
+            --print("upper is not tx")
+            --TODO: Don't know, maybe do some tileing etc
+            --print_code()
+            --print("upper "..upper.." tx "..tx.." stmt: "..stmt.." level: "..level)
+            tile(stmt, level,tx,level, "tx", "tmp_tx", counted)
+            --print_code()
+            
+            --print("stmt:"..stmt.." level+1: "..level+1)
+            --print("TILE 7")
+            tile(stmt, level+1,1,level+1,"tx", "tx",counted)
+            --print("TILE 3")
+            tile(stmt,level+1,level)
+            --print_code()
+            
+            if(ty > 1) then
+               --print_code()
+               --print("GOING IN")
+               lower,upper = hard_loop_bounds(stmt, level+1)
+               --print(string.format("ty %d  lower %d  upper %d", ty, lower, upper))
+               --upper=125
+               --print("NOW FOR Y: upper "..upper.." ty "..ty.." stmt: "..stmt.." level: "..(level+1).." bound:"..math.ceil(upper/ty))
+               tile(stmt, level+1,math.ceil(upper/ty),level+1, "tmp_ty", "ty", counted)
+               --tile(stmt, level+2,math.ceil(upper/ty),level+2, "tmp_ty", "ty", counted)
+            end
+            --print_code()
+            --rename_index(stmt, "tmp1", "tx")
+            --print("Warning: Need to implement some logic here to tile the single level shared copy loop to match thread dimensions")
+         end
+      end
+      --Always add sync
+      add_sync(stmt,start_loop)
+      
+   end
+   --print("ending copy to shared\n")
+   --print_code()
+end
+
+function unroll_to_depth(max_depth)
+   --print(string.format("\n\nunroll_to_depth(%d)", max_depth ))
+   --print "SYNC UP"
+   
+   cur = cur_indices(0)
+   thread_idxs = thread_indices()
+   guard_idx = thread_idxs[#thread_idxs]
+   
+   --print(string.format("cur    indices %s",list_to_string(cur)))
+   --print(string.format("thread indices %s",list_to_string(thread_idxs)))
+   --print(string.format("#thread_idxs = %d", #thread_idxs))
+   --print(string.format("guard_idx = %s", guard_idx))
+   
+   ---- HERE FIND OUT THE LOOPS WHICH ARE COMMON BETWEEN STATEMENTS   
+   common_loops = {}
+   comm_loops_cnt = 0
+   num_stmts = num_statements()
+   --print(string.format("num statements %d", num_stmts))
+   
+   for stmt=0,num_stmts-1 do
+      cur_idxs = cur_indices(stmt)
+      
+      --print(string.format("\nSTMT %d Current Indices: %s",stmt,list_to_string(cur_idxs)))
+      
+      if(chk_cur_level(stmt,"tx")>0) then
+         for ii=1,find_cur_level(stmt,"tx")-1 do    -- started at 0
+            --print(string.format("ii = %d", ii)) -- index starts at 1, what does index 0 do?
+            --if cur_idxs[ii] == nil then print "cur_idxs[i]] is NIL" 
+            --else print(string.format("cur_idxs[%d] = '%s'", ii, cur_idxs[ii])) -- index starts at 1, what does index 0 do?
+            --end
+            
+            if(cur_idxs[ii] ~= "bx" and cur_idxs[ii] ~= "by" and cur_idxs[ii] ~= nil and cur_idxs[ii] ~= "tx" and cur_idxs[ii] ~= "ty" and cur_idxs[ii] ~= "") then 
+               
+               --print(string.format("id %s is not in the list", cur_idxs[ii] ))
+               
+               for stmt1=stmt+1,num_stmts-1 do
+                  --print(string.format("\nii %d stmt1 is %d", ii, stmt1))          
+                  cur_idxs1 = cur_indices(stmt1)
+                  --print("\nstmt1 cur_idxs1 is "..list_to_string(cur_idxs1))   
+                  
+                  --print(string.format("cur level(%d, %s) = %d", stmt, "tx",  find_cur_level(stmt,"tx")))    
+                  
+                  endrange = find_cur_level(stmt,"tx")-1
+                  --print(string.format("for iii=1, %d do", endrange))
+                  
+                  for iii=1,find_cur_level(stmt,"tx")-1 do  -- started at 0
+                     --print(string.format("stmt %d   ii %d   iii %d ", stmt, ii, iii))
+                     --if(cur_idxs1[iii] ~= nil) then 
+                     --   print(string.format("stmt %d   ii %d   iii %d  cur_idxs1[%d] = '%s'", stmt, ii, iii, iii, cur_idxs1[iii]))  
+                     --else 
+                     --   print(string.format("stmt %d   ii %d   iii %d  cur_idxs1[%d] = NIL", stmt, ii, iii, iii))  
+                     --end
+                     
+                     if(cur_idxs1[iii] ~= "bx" and cur_idxs1[iii] ~= "by" and cur_idxs1[iii] ~= nil and cur_idxs1[iii] ~= "tx" and cur_idxs1[iii] ~= "ty" and cur_idxs1[iii] ~= "") then  
+                        if(cur_idxs[ii] == cur_idxs1[iii]) then
+                           --print("\nfound idx:"..cur_idxs[ii])
+			   --if(comm_loops_cnt == 0) then print "\n\n*** WARNING *** assigning to array index ZERO in Lua" end
+                           common_loops[comm_loops_cnt] = cur_idxs[ii]
+                           --print(string.format("cl[%d] = '%s'", comm_loops_cnt,   common_loops[comm_loops_cnt]))
+                           comm_loops_cnt = comm_loops_cnt + 1
+                        end
+                     end  
+                  end
+               end  
+            end
+         end
+      end
+   end
+   ----
+   --if(comm_loops_cnt>0) then 
+   --   print("\n COMM LOOPS :TOTAL "..comm_loops_cnt..", and are "..list_to_string(common_loops).." this loop :"..common_loops[0])
+   --else
+   --   print "UNROLL can't unroll any loops?"
+   --end
+   
+   
+   
+   
+   repeat
+      old_num_stmts = num_statements()
+      --print(string.format("old_num_statements %d", old_num_stmts))
+      
+      for stmt=0,old_num_stmts-1 do
+         cur_idxs = cur_indices(stmt)
+         --print(string.format("stmt %d    cur_idxs = %s", stmt, list_to_string(cur_idxs)))
+         if(#cur_idxs > 0) then 
+            gaurd_level = -1
+            if(chk_cur_level(stmt,guard_idx)>0) then
+               gaurd_level = find_cur_level(stmt,guard_idx)
+            end
+            --print(string.format("guard_level(sp) = %d", gaurd_level))
+            
+            if(gaurd_level>-1) then
+               level = next_clean_level(cur_idxs,gaurd_level)
+               --print(string.format("next clean level %d", level))
+               
+               --need to handle max_depth
+               num_unrolled = 0
+               level_unroll_comm = level
+               level_arr = {}
+               while level >= 0 do
+                  --print(string.format("while: level = %d", level))
+                  
+                  if num_unrolled == max_depth then break end
+                  --print("Unrolling "..stmt.." at level "..(level).." index ".. cur_idxs[gaurd_level+1])
+                  
+                  level_arr[num_unrolled] = level
+                  num_unrolled = num_unrolled + 1
+                  
+                  guard_level = find_cur_level(stmt,guard_idx)
+                  level = next_clean_level(cur_idxs,level+1)
+               end
+               --dies print("How many levels for unroll commands"..table.getn(level_arr).." which is "..level_arr[0].." and "..level_arr[#level_arr])
+               --if(table.getn(level_arr) ~= nil) then
+               
+               --print "OK, NOW WE UNROLL"
+               
+               if(level_unroll_comm >= 0)then
+                  for i = table.getn(level_arr),0,-1 do
+                     --print(string.format("\ni=%d", i))
+                     --print(string.format("[Unroll]unroll(%d, %d, 0)",stmt, level_arr[i]))     
+                     
+                     unroll(stmt,level_arr[i],0)
+                     --print("finished unroll]]\n")
+                     --print_code()
+                  end
+               end
+------
+            end    
+--[[
+
+THERE WAS A BIG BLOCK OF COMMENTED OUT CODE HERE 
+
+
+--]]
+------
+         end
+      end
+      new_num_stmts = num_statements()
+
+   until old_num_stmts == new_num_stmts
+
+end
+
+
diff --git a/test-chill/test-cases/examples/cuda-chill/cudaize.py b/test-chill/test-cases/examples/cuda-chill/cudaize.py
new file mode 100755
index 0000000..ffef009
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/cudaize.py
@@ -0,0 +1,1047 @@
+#! /usr/bin/python
+
+# THIS IS CUDAIZE.PY
+
+import chill
+import sys
+import math 
+
+strided = 0
+counted = 1
+
+def print_code():
+    chill.print_code()
+    print ""
+    sys.stdout.flush()
+
+    
+def table_contains_key( table, key ):  # use a dict for the 'table'?
+    return table.has_key(key) # (key in table)?
+
+def print_array( arr ):  # a useful function to mimic lua output 
+    for a in arr[:-1]:
+        print "%s," % a,
+    print "%s" % arr[-1]
+    sys.stdout.flush()
+
+def valid_indices( statement, indices ):
+    #print "valid_indices() python calling C cur_indices"
+    #print statement
+    cur = chill.cur_indices(statement) # calls C
+    #print "python valid_indices(), cur = ",
+    #print cur
+    #print "indices = ",
+    #print indices
+
+    for index in indices:
+        if not index in cur:
+            return False
+    return True
+
+def next_clean_level( indices_at_each_level, level):
+    #print "next_clean_level( ..., %d )" % level 
+    #print "indices_at_each_level ",
+    print_array( indices_at_each_level )
+
+    numlevels = len(indices_at_each_level)
+    #print "loop to %d" % numlevels
+    for i in range(level+1, numlevels+1):
+        pythoni = i-1 # LUA index starts at 1
+        #print "Checking level %d = '%s'" % (i, indices_at_each_level[pythoni])
+        sys.stdout.flush()
+        if len(indices_at_each_level[pythoni]) > 0: # LUA INDEX STARTS AT 1
+            #print "returning %d" % i
+            return i  # MATCH lua return value, LUA index starts at one
+    return -1  # no non-dummy indices
+
+
+
+
+def build_order(  final_order, tile_index_names, control_index_names, tile_index_map, current_level):
+    order = []   
+    #print "\nbuild_order()"
+    #print "build_order(): final_order = (",
+    count = 0
+    for f in final_order:
+        #if count+1 == len(final_order):
+        #    print "%s )" % f
+        #else:
+        #    print "%s," % f ,
+        count += 1
+
+        keys = control_index_names.keys()
+        keys.sort()
+        #if (2 == len(keys)):
+        #    print "build_order(): ctrl_idx_names = (%s, %s)" % (control_index_names[0], control_index_names[1])
+        #else:
+        #    print "build_order(): ctrl_idx_names = (%s" % control_index_names[0],
+        #    for k in keys[1:]:
+        #        print ", %s" % control_index_names[k],
+        #    print ")"
+
+    #print control_index_names
+    #print "cur_level %d" % current_level
+    
+    #print "tile index map: ",
+    #print tile_index_map
+
+
+    for i in range(len(final_order)):
+        k = final_order[i]  # not used?
+        skip = False
+        cur = final_order[i]  
+        # control loops below our current level should not be in the current order
+
+        # skip = cur in control_index_names[current_level+2:] 
+        #print "\n%d control_index_names, " % len(control_index_names)
+        #print control_index_names
+
+        for j in range(current_level+1, len(control_index_names)):
+            #print "comparing cur %s with cin[%d] %s" % ( cur, j, control_index_names[j])
+            if control_index_names[j] == cur:
+                skip = True 
+                #print "SKIP %s  " % cur
+
+        # possibly substitute tile indices if necessary
+        if tile_index_map.has_key(cur):
+            approved_sub = False
+            sub_string = tile_index_map[cur]
+            #print "sub_string = ",
+            #print sub_string
+
+            # approved_sub = sub_string in tile_index_names[current_level+2:]
+            for j in range(current_level+1, len(tile_index_names)):
+                if tile_index_names[j] == sub_string:
+                    approved_sub = True
+            if approved_sub:
+                cur = sub_string
+
+        if not skip:
+            order.append( cur)  
+    #print "build_order() returning order (",
+    #print order
+    #for o in order:
+    #    print "%s," % o,
+    #print ")"
+    return order
+
+def find_cur_level( stmt, idx ):
+    #print "find_cur_level(stmt %d, idx %s)  Cur indices" % ( stmt, idx ),
+    
+    cur = chill.cur_indices(stmt)
+    #for c in cur[:-1]:
+    #    print "%s," % c,
+    #print "%s" % cur[ -1 ] 
+
+    index = 1 # lua starts indices at 1 !!  
+    for c in cur:
+        if c == idx:
+            #print "found it at index %d" % index
+            #sys.stdout.flush()
+            #print "in find_cur_level, returning ",
+            #print index
+            return index
+        index += 1
+    #print "find_cur_level(), Unable to find index %s in" % idx,
+    #print cur
+    #print "in find_cur_level, returning -1"
+    return -1  # special meaning "it's not there"
+
+def chk_cur_level( stmt, idx ):
+    # search cur_indices for a ind at stmt
+    cur = chill.cur_indices(stmt)
+    if idx in cur:
+       return 1 + cur.index(idx)  # lua index starts at 1 !
+    return -1
+
+def find_offset( cur_order, tile, control):
+    #print "Looking for tile '%s' and control '%s' in (" % (tile, control),
+    #print cur_order
+    #for o in cur_order:
+    #    print "%s," % o,
+    #print ")"
+
+    idx1 = -1
+    idx2 = -1
+    if tile in cur_order: 
+        idx1 = 1 + cur_order.index(tile) # lua indexes from 1!
+    else:
+        print "find_offset(), unable to find tile %s in current list of indices" % tile
+        sys.exit(-1)
+
+    if control in cur_order:
+        idx2 = 1 + cur_order.index(control) # lua indexes from 1!
+    else:
+        print "find_offset(), unable to find control %s in current list of indices" % control
+        sys.exit(-1)
+
+    #print "found at level %d and %d" % ( idx2, idx1 )
+    # this appears horrible
+    if idx2 < idx1:
+        return idx2-idx1+1 # bad ordering
+    else:
+        return idx2-idx1
+
+
+
+def tile_by_index( tile_indices, sizes, index_names, final_order, tile_method):
+    #print "STARTING TILE BY INDEX"
+    #print "tile_by_index() tile_method ",
+    #print tile_method
+    #print "index_names: ",
+    #print index_names
+
+    stmt = 0 # assume statement 0
+    if not valid_indices( stmt, tile_indices):
+        print "python tile_by_index() one or more of ",
+        print tile_indices,
+        print " is not valid"
+        sys.exit(-1)
+
+    if tile_method == None:
+        #print "CREATING tile_method = 1"
+        tile_method = 1 # "counted"
+
+    tile_index_names = []
+    for ti in tile_indices:
+        tile_index_names.append( ti )  # make a copy? 
+    #print "tile_index_names:",
+    #print tile_index_names
+
+    control_index_names = {} # a dictionary?
+    tile_index_map =  {}
+    
+    #print "index_names: "
+    #print index_names
+
+    for pair in index_names:
+        valid = False
+        control = pair[0]
+        name    = pair[1]
+        #print "control %s   name  %s" % ( control, name )
+        
+        if control[0] == "l" and control[1].isdigit():
+            if control.endswith("_control"):
+                index = int(control[1: -8])
+                control_index_names[index-1] = name
+                valid = True
+
+            elif control.endswith("_tile"):
+                index = int(control[1: -5])
+                #print "index %d" % index
+                tile_index_names[index-1] = name # ?? 
+                tile_index_map[name] = tile_indices[index-1]
+                valid = True
+        if not valid:
+            print "%s is not a proper key for specifying tile or control loop indices\n" % control
+
+    #print "control_index_names = ",
+    #print control_index_names
+
+    #print "tile_index_names = ",
+    #print tile_index_names
+
+    #print "before call to build_order(), tile_index_map = ",
+    #print tile_index_map
+
+
+    # filter out control indices (and do name substitution of unprocessed tile indices) for a given level
+    cur_order = build_order(final_order, tile_indices, control_index_names, tile_index_map, -1)
+
+    #print "returned from build_order python\n\n"
+
+    # print("permute("..stmt..", {"..list_to_string(cur_order).."})")
+    #print "permute(%d, {" % stmt,
+    #print "cur_order = ",
+    #print cur_order,
+    #print "})"
+
+    cur_order.insert(0, stmt)
+    #print cur_order
+    chill.permute( tuple( cur_order)) 
+    #print "in cudaize.py, returned from C code chill.permute()\n"
+
+    for i in range(len(tile_indices)):
+        cur_idx = tile_indices[i]
+        #print "i %d  cur_idx %s calling build order ********" % (i, cur_idx)
+        cur_order = build_order( final_order, tile_indices, control_index_names, tile_index_map, i)
+        #print "cur_idx %s return from build order" % cur_idx
+        
+        # Find an offset between tile loop and control loop
+        #  0   = control loop one level above tile loop
+        #  -1  = control loop two levels above tile loop
+        #  > 0 = tile loop above control loop
+        #  In the last case, we do two extra tile commands to get the control
+        #  above the tile and then rely on the final permute to handle the
+        #  rest
+        level = find_cur_level(stmt,cur_idx)
+        #print "level %d\n" % level     
+
+        offset = find_offset(cur_order, tile_index_names[i], control_index_names[i])
+        #print "offset %d" % offset
+
+        if offset <= 0:
+            #print "[offset<=0]1tile(%d, %d, %d, %d, %s, %s, %d)" % (stmt, level, sizes[i], level+offset, tile_index_names[i], control_index_names[i], tile_method  )
+            chill.tile7( stmt, level, sizes[i], level+offset, tile_index_names[i], control_index_names[i], tile_method  )
+            #print "in cudaize.py, returned from C code chill.tile7\n"
+
+        else:
+            #print "2tile(%d, %d, %d, %d, %s, %s, %d)" % (stmt, level, sizes[i], level+offset-1, tile_index_names[i], control_index_names[i], tile_method  )
+            chill.tile7( stmt, level, sizes[i], level+offset-1, tile_index_names[i], control_index_names[i], tile_method  ) # regular level
+
+            # flip and tile control loop
+            #print "3tile(%d, %d, %d)" % ( stmt, level+1, level+1)
+            chill.tile3( stmt, level+1, level+1)
+
+            #print "4tile(%d, %d, %d)" % ( stmt, level+1, level)
+            chill.tile3( stmt, level+1, level)
+
+            #print_code()
+
+        # Do permutation based on cur_order
+        #print("permute based on build order calling build_order()")
+        cur_order = build_order(final_order, tile_indices, control_index_names, tile_index_map, i)
+
+        #print("permute based on build order return from build_order()")
+
+        #  print("permute("..stmt..", {"..list_to_string(cur_order).."})")
+        topermute = cur_order
+        topermute.insert(0, stmt)
+        chill.permute( tuple(topermute) ) 
+        #print "\nafter permute(), code is:"
+        #print_code()
+
+def normalize_index( index ):
+    #print "in cudaize.py, normalize_index( %s )" % index
+    stmt = 0  # assume stmt 0
+    l = find_cur_level( stmt, index )
+    chill.tile3( stmt, l, l )
+
+def is_in_indices( stmt, idx):
+    cur = chill.cur_indices(stmt)
+    return idx in cur
+
+def copy_to_registers( start_loop, array_name ):
+    #print "\n\n****** starting copy to registers"
+    #sys.stdout.flush()
+
+    stmt = 0    # assume stmt 0
+    cur = chill.cur_indices(stmt) # calls C    
+    table_Size = len(cur)
+
+    #print "Cur indices",
+    #print_array(cur)
+    #print "\nThe table size is %d" % table_Size
+    #count=1
+    #for c in cur:
+    #    print "%d\t%s" % (count,c)
+    #    count += 1
+
+    #print_code()
+
+    # would be much cleaner if not translating this code from lua!
+    level_tx = -1
+    level_ty = -1   
+    if is_in_indices(stmt,"tx"):
+        level_tx = find_cur_level(stmt,"tx")
+    if is_in_indices(stmt,"ty"):
+        level_ty = find_cur_level(stmt,"ty")
+    #print "level_tx %d  level_ty %d" % ( level_tx, level_ty )
+    #sys.stdout.flush()
+
+    ty_lookup_idx = "" 
+    org_level_ty = level_ty
+
+    # UGLY logic. Lua index starts at 1, so all tests etc here are off by 1 from the lua code
+    # level_ty initializes to -1 , which is not a valid index, and so there is added code to 
+    # make it not try to acccess offset -1.   -1 IS a valid python array index
+    # to top it off, the else below can assign a NIL to ty_lookup_idx! 
+    if level_ty != -1 and cur[level_ty] != "":
+        #print "IF  cur[%d] = %s" % ( level_ty, cur[level_ty] )
+        ty_lookup_idx = cur[level_ty] 
+    else:
+        #print "ELSE ty_lookup_idx = cur[%d] = %s" % ( level_ty, cur[level_ty-1]) 
+        ty_lookup_idx = cur[level_ty-1] 
+    #print "ty_lookup_idx '%s'" % ty_lookup_idx
+
+    if level_ty > -1:
+        #print "\ntile3(%d,%d,%d)" % (stmt,level_ty,level_tx+1)
+        chill.tile3(stmt,level_ty,level_tx+1) 
+    #print_code()   
+
+    cur = chill.cur_indices(stmt) # calls C 
+    table_Size = len(cur)
+    #print "Cur indices ",
+    #for c in cur:
+    #    print "%s," % c,
+    #print "\nThe table size is %d" % len(cur)
+    #count=1
+    #for c in cur:
+    #    print "%d\t%s" % (count,c)
+    #    count += 1
+    #sys.stdout.flush()
+
+    if is_in_indices(stmt,"tx"):
+        level_tx = find_cur_level(stmt,"tx")
+    if ty_lookup_idx != "":                      # perhaps incorrect test 
+        if is_in_indices(stmt,ty_lookup_idx):
+           level_ty = find_cur_level(stmt,ty_lookup_idx)
+           
+    ty_lookup = 1
+    idx_flag = -1
+    # find the level of the next valid index after ty+1
+    #print "\nlevel_ty %d" % level_ty
+    if level_ty > -1:
+       #print "table_Size %d" % table_Size
+       for num in range(-1 + level_ty+ty_lookup,table_Size):   # ??  off by one?
+           #print "num=%d   cur[num] = '%s'" % (num+1, cur[num]) # num+1 is lua index ????
+           sys.stdout.flush()
+           if cur[num] != "":
+               idx_flag = find_cur_level(stmt,cur[num])
+               #print "idx_flag = %d" % idx_flag
+               break
+               
+    #print "\n(first) I am checking all indexes after ty+1 %s" % idx_flag
+    #print_code()   
+    #print "" 
+
+    how_many_levels = 1
+    
+    #print "idx_flag = %d   I will check levels starting with %d" % (idx_flag, idx_flag+1)
+    # lua arrays start at index 1. the next loop in lua starts at offset 0, since idx_flag can be -1
+    # thus the check for "not equal nil" in lua (bad idea)
+    # python arrays start at 0, so will check for things that lua doesn't (?)
+    startat = idx_flag + 1
+    if idx_flag == -1:
+        startat = 1  # pretend we're lua for now.   TODO: fix the logic
+
+    for ch_lev in range(startat,table_Size+1):       # logic may be wrong (off by one)
+        #print "ch_lev %d" % ch_lev
+        if ch_lev <= table_Size and cur[ch_lev-1] != "":
+           #print "cur[%d] = '%s'" % ( ch_lev, cur[ch_lev-1] )
+           how_many_levels += 1
+
+    #print "\nHow Many Levels %d" % how_many_levels
+    sys.stdout.flush()
+    sys.stdout.flush()
+
+    if how_many_levels< 2:
+        while( idx_flag >= 0):
+            for num in range(level_ty+ty_lookup,table_Size+1):
+                #print "at top of loop, num is %d" % num
+                #print "cur[num] = '%s'" % cur[num-1]
+                if cur[num-1] != "":
+                    idx = cur[num-1]
+                    #print "idx '%s'" % idx
+                    sys.stdout.flush()
+                    curlev = find_cur_level(stmt,idx)
+                    #print "curlev %d" % curlev
+
+                    #print "\n[COPYTOREG]tile(%d,%d,%d)"%(stmt,curlev,level_tx)
+
+                    chill.tile3(stmt, curlev, curlev)
+                    curlev = find_cur_level(stmt,idx)
+                    #print "curlev %d" % curlev
+                    chill.tile3(stmt,curlev,level_tx)
+                    #print "hehe '%s'" % cur[num-1]
+                    
+                    cur = chill.cur_indices(stmt)
+                    #print "Cur indices INSIDE",
+                    #for c in cur:
+                    #    print "%s," % c,
+                    table_Size = len(cur)
+                    #print "\nTable Size is: %d" % len(cur)
+
+                    level_tx = find_cur_level(stmt,"tx")
+                    #print "\n level TX is: %d" % level_tx
+                    level_ty = find_cur_level(stmt,ty_lookup_idx)
+                    #print "\n level TY is: %d" %level_ty
+                    idx_flag = -1
+                    #print "idx_flag = -1"
+
+
+                    #- find the level of the next valid index after ty+1
+                    #- the following was num, which conflicts with loop we're already in, and otherwise wasn't used (?)
+                    for num2 in range( -1 + level_ty+ty_lookup ,table_Size): # lua starts index at one
+                        #print "num mucking num = %d" % num2
+                        if(cur[num2] != ""):
+                            #print "cur[%d] = '%s'" % ( num2, cur[num2] )
+                            idx_flag = find_cur_level(stmt,cur[num2])
+                            #print("\n(second) I am checking all indexes after ty+1 %s",cur[num2])
+                            break
+
+                    #print "num mucked to %d     idx_flag = %d" % (num, idx_flag)
+
+                #print "at bottom of loop, num is %d" % num
+          
+    #print "done with levels"
+
+    # this was a block comment ???
+
+#    for num in range(level_ty+1, table_Size+1):
+#        print "num %d" % num
+#        if cur[num-1] != "":
+#            idx_flag = find_cur_level(stmt,cur[num-1])  ## ugly 
+#    print "idx_flag = %d" % idx_flag
+
+    # change this all to reflect the real logic which is to normalize all loops inside the thread loops. 
+#    print "change this all ...\n"
+#    print "level_ty+1 %d  table_Size-1 %d     idx_flag %d" %( level_ty+1, table_Size-1, idx_flag)
+#    sys.stdout.flush()
+#    sys.stdout.flush()
+
+#    while level_ty+1 < (table_Size-1) and idx_flag >= 0:
+#        print "*** level_ty %d" %  level_ty
+#        for num in range(level_ty+2,table_Size+1):  # lua for includes second value
+#            print "num %d   cur[num] %s" % (num, cur[num])
+#            if cur[num] != "":
+#                idx = cur[num]
+#                print "idx='%s'" % idx
+#                #print_code()
+                
+                
+            
+
+    #print "ARE WE SYNCED HERE?"
+    #print_code()
+
+    #  [Malik] end logic
+    start_level = find_cur_level(stmt, start_loop) # start_loop was passed parameter!
+
+    # We should hold constant any block or tile loop
+    block_idxs  = chill.block_indices()
+    thread_idxs = chill.thread_indices()
+    #print"\nblock indices are"
+    #for index, val in enumerate(block_idxs):
+    #    print "%d\t%s" % ( int(index)+1 , val )
+    #print"\nthread indices are"
+    #for index, val in enumerate(thread_idxs):
+    #    print "%d\t%s" % ( int(index)+1 , val )
+    #print "\nStart Level: %d" % start_level
+
+    hold_constant = []
+    #print("\n Now in Blocks")
+    for idx in block_idxs:
+        blocklevel = find_cur_level(stmt,idx)
+        if blocklevel >= start_level:
+           hold_constant.append(idx)
+           #print "\nJust inserted block %s in hold_constant" %idx
+
+    #print("\n Now in Threads")
+    for idx in thread_idxs:
+        blocklevel = find_cur_level(stmt,idx)
+        if blocklevel >= start_level:
+            hold_constant.append(idx)
+            #print "\nJust inserted thread %s in hold_constant" %idx
+    #print "\nhold constant table is: "
+    #for index, val in enumerate(hold_constant):
+    #    print "%d\t%s" % ( int(index)+1 , val )
+    
+    #print("\nbefore datacopy pvt")
+    old_num_stmts = chill.num_statements()
+    #sys.stdout.flush()
+
+    #print "\n[DataCopy]datacopy_privatized(%d, %s, %s, " % (stmt, start_loop, array_name),
+    #print hold_constant,
+    #print ")"
+    passtoC = [stmt, start_loop, array_name ] # a list
+    passtoC.append( len(hold_constant ) )
+    for h in hold_constant:
+        passtoC.append( h )
+    chill.datacopy_privatized( tuple( passtoC ))
+    sys.stdout.flush()
+    sys.stdout.flush()
+    
+    new_num_statements = chill.num_statements()
+    #print "new num statements %d" % new_num_statements    
+
+    # Unroll to the last thread level
+#    for stmt in range(old_num_statements, new_num_statements):
+#        print "unrolling statement %d" % stmt
+#        level = find_cur_level(stmt,thread_idxs[-1]) #get last thread level
+#        print "level is %d" % level
+#        idxs = chill.cur_indices(stmt)
+#        if level < len(idxs):
+#            chill.unroll(stmt,level+1,0)
+
+
+
+def copy_to_shared( start_loop, array_name, alignment ):
+    #print "\nstarting copy to shared( %s, %s, %d)" % (start_loop, array_name, alignment ) 
+    #print "copy_to_shared( %s, %s, %d) in cudaize.py" % ( start_loop, array_name, alignment )
+    stmt = 0 # assume statement 0
+
+    cur = chill.cur_indices(stmt)
+    #print "Cur indices ",
+    #print_array( cur )
+
+    start_level = find_cur_level( stmt, start_loop )
+    #print "start_level %d" % start_level
+
+    old_num_statements = chill.num_statements()
+    #print "old_num_statements %d" % old_num_statements
+    
+
+    # Now, we give it indices for up to two dimensions for copy loop
+    copy_loop_idxs = ["tmp1","tmp2"]
+    #chill.datacopy_9arg(stmt, start_level, array_name, copy_loop_idxs, False, 0, 1, alignment,True)
+    passtoC = [stmt, start_level, array_name]   # a list
+    passtoC.append( len(copy_loop_idxs))
+    for i in copy_loop_idxs:
+        passtoC.append(i)
+    passtoC.append( 0 ) # False
+    passtoC.append( 0 )
+    passtoC.append( 1 )
+    passtoC.append( alignment )
+    passtoC.append( 1 )   # True
+    #print "\n[DataCopy]datacopy( ",
+    #print passtoC,
+    #print ")"
+
+    #if array_name == "b":
+    #    chill.cheat(1)
+    #if array_name == "c":
+    #    chill.cheat(2)
+    
+    chill.datacopy_9arg( tuple( passtoC ))
+
+    #print "back from datacopy_9arg\n\n\n"
+    #sys.stdout.flush()
+
+
+    #print "calling add_sync( %d, %s )" % ( stmt, start_loop )
+    chill.add_sync( stmt, start_loop )
+    #print "back from add_sync()\n\n"
+
+    new_num_statements = chill.num_statements()
+    
+    #  This is fairly CUBLAS2 specific, not sure how well it generalizes,
+    #  but for a 2D copy, what we want to do is "normalize" the first loop
+    #  "tmp1" then get its hard upper bound. We then want to tile it to
+    #  make the control loop of that tile "ty". We then tile "tmp2" with a
+    #  size of 1 and make it "tx".
+
+    #print "fairly CUBLAS2 specific, OLD %d  NEW %d" % ( old_num_statements, new_num_statements)
+    sys.stdout.flush()
+    sys.stdout.flush()
+
+    for stmt in range(old_num_statements, new_num_statements):
+        #print "for stmt = %d" % stmt
+        level = find_cur_level( stmt, "tmp2")
+        #print "FOUND CUR LEVEL?  level '",
+        #print level,
+        #print "'"
+
+        #print "in loop, stmt %d   level %d" % ( stmt, level )
+        if level != -1:
+            #print "\nCopy to shared: [If was no error]\n"
+            find_cur_level(stmt,"tmp2")
+            chill.tile3( stmt, level, level )
+            
+            #print "hard_loop_bounds( %d, %d )" % (stmt, level)
+            bounds = chill.hard_loop_bounds(stmt, level)
+            lower = bounds[0]
+            upper = 1+ bounds[1]
+            #print "lower %d  upper %d" % ( lower, upper )
+
+            dims = chill.thread_dims()
+            #print "in cudaize.py copy_to_shared, dims =",
+            #print dims
+            tx = dims[0]
+            ty = dims[1]
+            #print "2-loop cleanup: lower, upper: %d, %d,  tx: %d" % ( lower, upper, tx)
+
+            level = find_cur_level(stmt,"tmp1")
+            #print "level %d" % level
+            if tx == upper and ty == 1:
+                #print "tx = %d    upper = %d     ty = %d"% (tx, upper, ty)
+                #print "Don't need"
+
+                # Don't need an extra tile level, just move this loop up
+                second_level = find_cur_level(stmt,"tmp2")
+                chill.tile7(stmt, second_level, 1, level, "tx", "tx", counted)
+
+            else:
+                #print "DO need?"
+                if ty == 1:
+                    new_ctrl = "tmp3" 
+                else:
+                    new_ctrl = "ty"
+
+                # LOTS of commented out code here in cudaize.lua 
+
+                #print_code()
+                #print "\nStarting tmp2\n"
+                first_level  = find_cur_level(stmt,"tmp1")
+                second_level = find_cur_level(stmt,"tmp2")
+                bounds = chill.hard_loop_bounds(stmt, second_level)
+                lower = bounds[0]
+                upper = 1 + bounds[1]   # BROKEN?
+                        
+                #print "[Malik]-loop cleanup@tmp2: lower, upper: %d, %d, tx: %d,first level:%d,second_level:%d" % ( lower, upper-1, tx, first_level, second_level) 
+
+                # Move the fastest changing dimension loop to the outermost,identified by "tmp2" and to be identified as tx.
+                #print "\n[fastest]tile(%d, %d, %d,%d,%s,%s,counted)"%(stmt, second_level,1,first_level, "tx", "tx")
+                chill.tile7(stmt, second_level,1,first_level,"tx","tx",counted)
+                #print_code()
+
+                first_level = find_cur_level(stmt,"tmp1")
+                bounds = chill.hard_loop_bounds(stmt, first_level)
+                lower_1 =     bounds[0]
+                upper_1 = 1 + bounds[1]
+                tx_level = find_cur_level(stmt,"tx")
+                bounds = chill.hard_loop_bounds(stmt,tx_level)
+                lower_tx =   bounds[0]
+                upper_tx = 1+bounds[1]
+                #print "UL_1 %d %d     UL_tx %d %d" % ( lower_1, upper_1-1, lower_tx, upper_tx-1)
+
+                if int(math.ceil( float(upper_tx)/float(tx))) > 1:
+                     #print "ceil I say"
+                     #print "\n[Tile1]tile(%d, %d, %d,%d,%s,%s,counted)" % (stmt, tx_level,tx,tx_level, "tx", "tmp1")
+                     chill.tile7(stmt,tx_level,tx,tx_level,"tx","tmp_tx",counted)
+                     #print_code()
+
+                     repeat = find_cur_level(stmt,"tx")
+                     #print "\n[Tile1]tile(%d, %d, %d)" % (stmt, repeat, repeat)
+                     chill.tile3(stmt, repeat, repeat)  #find_cur_level(stmt,"tx"),find_cur_level(stmt,"tx"))
+                     #print_code()
+
+                     if find_cur_level(stmt,"tx")>find_cur_level(stmt,"tmp_tx"):
+                        #print "\nagain [Tile1]tile(%d, %d, %d)" % (stmt,find_cur_level(stmt,"tx"),find_cur_level(stmt,"tmp_tx"))
+                        chill.tile3(stmt,find_cur_level(stmt,"tx"),find_cur_level(stmt,"tmp_tx"))
+                        #print_code()
+
+                #print_code()
+
+                #print "\nStarting tmp1\n"
+                # Handle the other slower changing dimension, the original outermost loop, now identified by "tmp1", to be identified as "ty".
+                chill.tile3(stmt,find_cur_level(stmt,"tmp1"),find_cur_level(stmt,"tmp1"))      
+                #print_code()
+
+                ty_level = find_cur_level(stmt,"tmp1")
+                bounds = chill.hard_loop_bounds(stmt,ty_level)
+                lower_ty = bounds[0]
+                upper_ty = 1 + bounds[1]
+
+                tx_level = find_cur_level(stmt,"tx")
+                bounds = chill.hard_loop_bounds(stmt,tx_level)
+                lower_tx = bounds[0]
+                upper_tx = 1 + bounds[1]
+
+                #print "[Malik]-loop cleanup@tmp1: lowerty, upperty: %d, %d, ty: %d,ty level:%d,tx_level:%d, stmt: %d" % ( lower_ty, upper_ty-1, ty, ty_level, tx_level, stmt)
+                
+                #print "before ceil"
+                #sys.stdout.flush()
+
+                if(math.ceil(float(upper_ty)/float(ty)) > 1):
+                    #print "CEIL IF"
+                    #print "\n Inside upper_ty/ty > 1\n"
+
+                    #print "\n[Tile2]tile(%d, %d, %d,%d,%s,%s,counted)"%(stmt, ty_level,ty,ty_level, "ty", "tmp_ty")
+                    chill.tile7(stmt,ty_level,ty,ty_level,"ty","tmp_ty",counted)
+                    #print_code()
+
+                    #print "\n[Tile2-1]tile(%d, %d, %d)"%(stmt,find_cur_level(stmt  ,"ty"),find_cur_level(stmt,"ty"))
+                    chill.tile3(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"ty"))
+                    #print_code()
+
+                    cur_idxs = chill.cur_indices(stmt)
+                    #print "\n cur indexes are ",
+                    #print_array( cur_idxs)
+                    #sys.stdout.flush()
+
+                    # Putting ty before any tmp_tx
+                    idx_flag = -1
+                    if "tmp_tx" in cur_idxs:
+                        idx_flag = 1 + cur_idxs.index("tmp_tx")   # lua index starts at 1
+                    #print "\n (1) so i have found out the value of idx flag as %d" % idx_flag
+                    #sys.stdout.flush()      
+                    
+                    if idx_flag >= 0:
+                         if find_cur_level(stmt,"ty") > find_cur_level(stmt,"tmp_ty"):
+                             #print "\n[Tile2-2]tile(%d, %d, %d)"%(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))
+                             chill.tile3(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))
+                             #print_code()
+                    
+                    
+                    #  Now Putting ty before any tmp_ty
+                    sys.stdout.flush()      
+                    idx_flag = -1
+                    if "tmp_ty" in cur_idxs:
+                        idx_flag = 1 + cur_idxs.index("tmp_ty") # lua index starts at 1
+                    #print "\n IF  so i have found out the value of idx flag as %d" % idx_flag
+                    #sys.stdout.flush()      
+                                            
+                    if idx_flag >= 0:
+                        #print "one more test"
+                        sys.stdout.flush()
+                        if find_cur_level(stmt,"ty")>find_cur_level(stmt,"tmp_ty"):
+                            #print "\n[Tile2-2]tile(%d, %d, %d)"%(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))
+                            #sys.stdout.flush()
+                            chill.tile3(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))
+                            #print_code()
+
+
+
+                else:
+                    #print "CEIL ELSE"
+                    #print "\n[Tile3]tile(%d, %d, %d,%d,%s,%s,counted)" % (stmt, ty_level,1,ty_level, "ty", "ty")
+                    #sys.stdout.flush()
+                    chill.tile7( stmt, ty_level, 1, ty_level, "ty", "ty", counted )
+                    #print_code()
+
+                    #print "\n[Tile3-1]tile(%d, %d, %d)"%(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tx")+1)
+                    sys.stdout.flush()
+
+                    chill.tile3(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tx")+1)
+                    #print_code()
+
+
+                    idx_flag = -1
+                    # LUA code checks to see if cur_idxs exists?  it is unused except in the other clause of this is
+                    #if(cur_idxs) then
+                        #print "CAN NEVER GET HERE?  cur_idxs"
+                        #for num= 0,table.getn(cur_idxs) do
+                            #if(cur[num] == "tmp_ty") then
+                            #idx_flag = find_cur_level(stmt,cur[num])
+                            #break
+                        #end
+                    #end
+                    print "\n ELSE so i have found out the value of idx flag as %d" % idx_flag
+                    if idx_flag >= 0:  # can't happen
+                        print "tile( stmt %d, level ty %d, level ty %d" % ( stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))
+                        #chill.tile3(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))
+                    
+                        
+                    
+
+                    
+            #print "\n\n *** at bottom of if in copy to shared, "
+            #print_code()
+            #print "end of if"
+
+        else:
+            #  copy to shared only created one level, not two, so we use a different approach (MV & TMV)
+            #print "\nCopy to shared: [If was error]\n"
+            level = find_cur_level(stmt,"tmp1")
+            chill.tile3(stmt, level, level)
+
+            dims = chill.thread_dims()
+            #print dims
+            tx = dims[0]
+            ty = dims[1]
+
+            bounds = chill.hard_loop_bounds(stmt, level)
+            lower = bounds[0]   
+            upper = bounds[1]
+
+            #print "bounds  lower %d    upper %d" % (lower, upper)
+            upper = upper+1 # upper bound given as <=, compare to dimensions tx which is <
+            if upper == tx:
+                #print "upper == tx"
+                chill.rename_index( stmt, "tmp1", "tx")
+            else:
+                #print "upper is not tx"
+                #print "upper %d tx %d stmt: %d level: %d" % ( upper, tx, stmt, level)
+                chill.tile7( stmt, level, tx, level, "tx", "tmp_tx", counted)
+                #print_code()
+
+                #print "stmt:%d level+1: %d" % ( stmt, level+1) 
+                #print("TILE 7")
+                chill.tile7( stmt, level+1,1,level+1,"tx", "tx",counted)
+                #print("TILE 3")
+                chill.tile3( stmt, level+1, level)
+                #print_code()           
+
+
+                if ty > 1:
+                   #print "GOING IN"
+                   bounds = chill.hard_loop_bounds(stmt, level+1)
+                   lower = bounds[0]   
+                   upper = bounds[1]   
+                   #print "ty %d  lower %d  upper %d" % ( ty, lower, upper )
+                   floatdiv = float(upper)/float(ty)
+                   bound =  int(math.ceil(float(upper)/float(ty)))
+                   #print "NOW FOR Y: upper %d ty %d stmt: %d level: %d bound: %d" % ( upper, ty, stmt, level+1,   bound)
+                   chill.tile7(stmt, level+1, bound, level+1, "tmp_ty", "ty", counted)
+
+        # Always add sync
+        chill.add_sync( stmt, start_loop )
+    #print "ending copy to shared\n"
+    #sys.stdout.flush()
+    #print_code()     
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def unroll_to_depth( max_depth ):
+    print "\n\nunroll_to_depth(%d)" % max_depth
+    print "SYNC UP"
+    sys.stdout.flush()
+
+    cur = chill.cur_indices(0)
+    thread_idxs = chill.thread_indices()
+    guard_idx = thread_idxs[-1]  # last one
+
+    print "cur    indices",
+    print_array(cur)
+    print "thread indices", 
+    print_array(thread_idxs)
+    print "guard_idx = %s" % guard_idx
+
+    #print "thread_idxs = ",
+    #print thread_idxs
+    guard_idx = thread_idxs[-1]
+    #print "guard_idx = %s" % guard_idx
+
+    #  HERE FIND OUT THE LOOPS WHICH ARE COMMON BETWEEN STATEMENTS
+    common_loops = []
+    comm_loops_cnt = 0
+    num_stmts = chill.num_statements()
+    print "num statements %d" % num_stmts
+
+    for stmt in range(num_stmts):
+        sys.stdout.flush()
+        print "\nSTMT %d" % stmt,
+        cur_idxs = chill.cur_indices(stmt)
+        print "Current Indices:",
+        for c in cur_idxs[:-1]:
+            print "%s," % c,
+        print "%s" % cur_idxs[-1]   # last one
+        sys.stdout.flush()
+        #print_code()
+        
+        if chk_cur_level(stmt, "tx") > 0:
+            
+            for ii in range(find_cur_level(stmt,"tx")-1):
+                print "ii = %d\ncur_idxs[%d] = '%s'" % (ii+1, ii+1, cur_idxs[ii]) # print to match lua
+                id = cur_idxs[ii]
+                if id not in ["bx", "by", "", "tx", "ty"]:
+
+                    print "id %s is not in the list" % id
+
+                    for stmt1 in range(stmt+1, num_stmts):
+                        print "\nii %d stmt1 is %d" % (ii+1, stmt1)  # print to match lua 
+                        cur_idxs1 = chill.cur_indices(stmt1)
+                        print "\nstmt1 cur_idxs1 is ",
+                        for ind in cur_idxs1[:-1]:
+                            print "%s," % ind,
+                        print "%s" % cur_idxs1[-1]
+
+                        print "cur level(%d, %s) = %d" % (stmt, "tx", find_cur_level(stmt,"tx") )
+                        sys.stdout.flush()
+
+                        endrange = find_cur_level(stmt,"tx")-1
+                        print "for iii=1, %d do" % endrange
+                        sys.stdout.flush()
+                        for iii in range(endrange):   # off by one?  TODO 
+                            print "stmt %d   ii %d   iii %d\n" % (stmt, ii+1, iii+1),
+                            sys.stdout.flush()
+                            
+                            if iii >= len(cur_idxs1):
+                                print "stmt %d   ii %d   iii %d  cur_idxs1[%d] = NIL" % (stmt, ii+1, iii+1, iii+1, )  # print to match lua 
+                            else:
+                                print "stmt %d   ii %d   iii %d  cur_idxs1[%d] = '%s'" % (stmt, ii+1, iii+1, iii+1, cur_idxs1[iii])  # print to match lua 
+                            sys.stdout.flush()
+
+                            # this will still probably die 
+                            if iii < len(cur_idxs1) and [iii] not in ["bx", "by", "tx", "ty", ""]:
+                                if cur_idxs[ii] == cur_idxs1[iii]:
+                                    print "\nfound idx:%s" % cur_idxs[ii]
+                                    common_loops.append(cur_idxs[ii])
+                                    print "cl[%d] = '%s'" % ( comm_loops_cnt, cur_idxs[ii] )
+                                    comm_loops_cnt = len(common_loops)
+
+    if len(common_loops) > 0:
+        print "\n COMM LOOPS :TOTAL %d, and are " % comm_loops_cnt,
+        print common_loops, 
+        print " this loop : %s" % common_loops[0]
+    else:
+        print "UNROLL can't unroll any loops?"
+
+
+    while True:  # break at bottom of loop   (repeat in lua)
+        old_num_statements = chill.num_statements()
+        print "old_num_statements %d" % old_num_statements
+
+        for stmt in range(old_num_statements):
+            cur_idxs = chill.cur_indices(stmt)
+            print "stmt %d    cur_idxs =" % stmt,
+            index = 0
+            for i in cur_idxs:
+                index +=1
+                if index == len(cur_idxs):
+                    print "%s" %i
+                else:
+                    print "%s," % i,
+
+            if len(cur_idxs) > 0:
+                guard_level = -1
+                if chk_cur_level(stmt, guard_idx) > 0:
+                    guard_level = find_cur_level(stmt,guard_idx)
+                print "guard_level(sp) = %d" % guard_level
+                if guard_level > -1:
+                    level = next_clean_level(cur_idxs,guard_level)
+                    print "next clean level %d" % level
+
+                    
+                    #print "looking at %d" % stmt
+                    #print "comparing %d and %d in" % (guard_level, level),
+                    #index = 0
+                    #for i in cur_idxs:
+                    #index +=1
+                    #if index == len(cur_idxs):
+                    #    print "%s" %i
+                    #else:
+                    #    print "%s," % i,
+
+                    # need to handle max_depth
+                    num_unrolled = 0
+                    level_unroll_comm = level
+                    level_arr = []
+
+                    #print "before while, level = %d" % level 
+                    while level >= 0:
+                        print "while: level = %d" % level 
+                        if num_unrolled == max_depth:
+                            break
+
+                        print "Unrolling %d at level %d index %s" % ( stmt, level, cur_idxs[guard_level])  # ??? 
+                        level_arr.append(level)
+
+                        guard_level = find_cur_level(stmt,guard_idx)
+                        level = next_clean_level(cur_idxs,level+1)
+
+                    print "OK, NOW WE UNROLL"
+                    if level_unroll_comm >= 0:
+                        level_arr.reverse()  
+                        for i,lev in enumerate(level_arr):
+                            print "\ni=%d" % i
+                            print "[Unroll]unroll(%d, %d, 0)" % (stmt, lev)
+                            chill.unroll(stmt, lev, 0)
+
+
+        new_num_statements = chill.num_statements()
+        if old_num_statements == new_num_statements:
+            break  # exit infinite loop
+
+
+#  all other calls to C have a routine in this file   (?)
+def unroll( statement, level, unroll_amount ):
+    chill.unroll( statement, level, unroll_amount )
+
diff --git a/test-chill/test-cases/examples/cuda-chill/mm.c b/test-chill/test-cases/examples/cuda-chill/mm.c
new file mode 100644
index 0000000..0efbeeb
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mm.c
@@ -0,0 +1,10 @@
+#define N 1024
+
+void normalMM(float c[N][N], float a[N][N], float b[N][N]) {
+  int i, j, k;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      for (k = 0; k < N; k++)
+        c[j][i] = c[j][i] + a[k][i] * b[j][k];
+}
diff --git a/test-chill/test-cases/examples/cuda-chill/mm.lua b/test-chill/test-cases/examples/cuda-chill/mm.lua
new file mode 100644
index 0000000..5bde1b0
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mm.lua
@@ -0,0 +1,38 @@
+init("mm.c", "normalMM", 0)
+dofile("cudaize.lua")
+N=1024
+Ti=128
+Tj=64
+Tk=16
+Tii=16
+Tjj=16
+
+
+
+
+N=1024
+
+
+
+
+
+
+
+
+
+
+
+
+
+tile_by_index({"i","j"},{Ti,Tj},{l1_control="ii",l2_control="jj"},{"ii","jj","i","j","k"})CU=1
+
+tile_by_index({"k"},{Tk},{l1_control="kk"},{"ii","jj","kk","i","j","k"})CU=3
+
+tile_by_index({"i","j"},{Tii,Tjj},{l1_control="iii",l2_control="jjj"},{"ii","jj","kk","i","iii","j","jjj","k"},1)CU=2
+
+cudaize("mm_GPU",{a=1048576,b=1048576,c=1048576},{block={"ii","jj"}, thread={"i","j"}})CU=2
+copy_to_shared("tx","a",-16)
+copy_to_shared("tx","b",-16)
+copy_to_registers("kk","c")
+--print_code()
+unroll_to_depth(2)
diff --git a/test-chill/test-cases/examples/cuda-chill/mpeg4.c b/test-chill/test-cases/examples/cuda-chill/mpeg4.c
new file mode 100755
index 0000000..7f83bf7
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mpeg4.c
@@ -0,0 +1,23 @@
+#define N1 4096
+#define N2 4096
+#define WINDOW_SIZE 16
+
+void mpeg4_cpu(float result[N1][N2], float prev[N2+WINDOW_SIZE][N2+WINDOW_SIZE], float  curr[WINDOW_SIZE*WINDOW_SIZE])
+{
+	unsigned int i;
+	unsigned int j;
+	unsigned int k;
+	unsigned int l;
+
+	for ( i = 0; i < N1; ++i)    
+		for ( j = 0; j < N2; ++j) 
+                       for ( k = 0; k < WINDOW_SIZE; ++k) 
+				for ( l = 0; l < WINDOW_SIZE; ++l) 
+					result[i][j] += prev[i+k][j+l] * curr[k*WINDOW_SIZE+l];
+				
+			
+
+		
+	
+}
+
diff --git a/test-chill/test-cases/examples/cuda-chill/mpeg4.lua b/test-chill/test-cases/examples/cuda-chill/mpeg4.lua
new file mode 100644
index 0000000..f025dc0
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mpeg4.lua
@@ -0,0 +1,45 @@
+--CUBLAS 2 MM Multiply
+
+--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you
+--call init() and use global variables to specify procedure and loop
+
+--Second parameter is procedure # and third is loop #
+init("mpeg4.c", "mpeg4_cpu", 0) 
+
+--dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,copy_to_shared methods
+dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,copy_to_shared methods
+
+N=4096
+M=4096
+W=16
+
+--TI 4ust be <= M
+--TJ must be <=TI
+Ti=32
+Tj=32
+Tii=16
+Tjj=16
+Tk=4
+--permute(0,{"j","i","k","l"})
+tile_by_index({"i","j"},{Ti,Tj},{l1_control="ii",l2_control="jj"},{"ii","jj","i","j","k","l"})
+--tile_by_index({"k","l"},{Tk*2,Tk*2},{l1_control="kk",l2_control="ll"},{"ii","jj","kk","ll","i","j","k","l"})
+--print_code()
+--tile_by_index({"k","l"},{Tk,Tk},{l1_control="kk",l2_control="ll"},{"ii","jj","i","j","kk","k","ll","l"})
+tile_by_index({"i","j"},{Tii,Tjj},{l1_control="iii",l2_control="jjj"},{"ii","jj","iii","i","jjj","j","k","l"})
+--print_code()
+--normalize_index("j")
+--normalize_index("i")
+--print_code()
+cudaize("kernel_GPU",{curr=W*W,prev=(N+W)*(M+W),result=N*M},{block={"ii","jj"}, thread={"i","j"}})
+--print_code()
+copy_to_shared("iii","prev",16)
+
+copy_to_registers("jjj","result")
+
+--print_code()
+--copy_to_constant_no_tile("curr")
+unroll_to_depth(2)
+print_code()
+print_space()
+
+
diff --git a/test-chill/test-cases/examples/cuda-chill/mriq-fh.c b/test-chill/test-cases/examples/cuda-chill/mriq-fh.c
new file mode 100755
index 0000000..1e924b7
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mriq-fh.c
@@ -0,0 +1,38 @@
+#define X 32768
+#define K 256
+struct kValues {
+  float Kx;
+  float Ky;
+  float Kz;
+  float PhiMag;
+};
+extern float sin(float);
+extern float cos(float);
+
+void mriFH_cpu(float *rPhi,float *rRho,float *iRho, float *iPhi, float *rD, float *iD, float *kx, float *ky, float *kz, float *dx, float *dy, float *dz, float *rFHref, float *iFHref)
+{
+
+    	float rfh;
+	float ifh;
+	float exp;
+	float cArg;
+	float sArg;
+    	//float rRho[K];
+	//float iRho[K];
+        unsigned int k;
+	unsigned int x;
+ 
+      
+    for (x = 0; x < X; ++x) {
+        for (k = 0; k < K; ++k) {
+            
+	       exp = 2 * 3.14159 * (kx[k]* dx[x] + ky[k]* dy[x] + kz[k]* dz[x]);
+	       cArg = cos(exp);
+	       sArg = sin(exp);
+            rFHref[x] += rRho[k]* cArg - iRho[k]* sArg;
+            iFHref[x] += iRho[k]*cArg + rRho[k]*sArg;
+        }
+         
+    }
+}
+
diff --git a/test-chill/test-cases/examples/cuda-chill/mriq-fh.lua b/test-chill/test-cases/examples/cuda-chill/mriq-fh.lua
new file mode 100755
index 0000000..3277bac
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mriq-fh.lua
@@ -0,0 +1,73 @@
+--CUBLAS 2 MM Multiply
+
+--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you
+--call init() and use global variables to specify procedure and loop
+
+--Second parameter is procedure # and third is loop #
+init("mriq-fh.c", "mriFH_cpu", 0) 
+
+dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
+                      --copy_to_shared methods
+N=32768
+M=256
+Tx=256
+
+
+print_code()
+--permute(0,{"j","i"})
+--tile_by_index({"j","i"}, {TI,TJ}, {l1_control="jj", l2_control="ii"}, {"jj","ii", "j", "i"})
+tile_by_index({"x"},{Tx},{l1_control="xx"},{"xx","x","k"})
+--tile_by_index({"x"},{16},{l1_control="xx1"},{"xx","x","xx1","k"})
+--tile_by_index({"j"}, {TI}, {l1_control="jj"}, {"ii","jj", "j", "i"})
+--tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"})
+print_code()
+
+normalize_index("x")
+--normalize_index("i")
+print_code()
+--tile_by_index({"i"}, {TI}, {l1_control="iii",l1_tile="i"}, {"ii","jj", "iii","j","i"})
+--print_code()
+--cudaize("Kernel_GPU", {x=N,y=N,z=N,Qr=N,Qi=N,kVals=M},{block={"jj"}, thread={"j"}})
+cudaize("kernel_GPU",{dx=N,dy=N,dz=N,iRho=M,kx=M,ky=M,kz=M,rFHref=N,iFHref=N,rRho=M},{block={"xx"}, thread={"x"}})
+--copy_to_shared("tx","iRho",-16)
+--copy_to_shared("tx","dz",1)
+--copy_to_shared("tx","rRho",-16)
+--copy_to_registers("tx","rFHref")
+--copy_to_registers("tx","rRho")
+--copy_to_registers("tx","iRho")
+--copy_to_registers("tx","kx")
+--copy_to_registers("tx","dx")
+--copy_to_registers("tx","ky")
+--copy_to_registers("tx","dy")
+--copy_to_registers("tx","kz")
+--copy_to_registers("tx","dz")
+--copy_to_registers("tx","iFHref")
+--copy_to_texture("rRho")
+--copy_to_texture("kx")
+--copy_to_texture("dx")
+--copy_to_texture("ky")
+--copy_to_texture("dy")
+--copy_to_texture("kz")
+--copy_to_texture("dz")
+--copy_to_texture("iRho")
+--print_code()--]]
+--unroll(0,4,0)
+--copy_to_constant_no_tile("kx")
+--copy_to_constant_no_tile("ky")
+--copy_to_constant_no_tile("kz")
+--copy_to_constant_no_tile("rRho")
+--copy_to_constant_no_tile("iRho")
+
+--unroll_to_depth(1)
+print_code()
+--[[
+copy_to_Texture("rRho")
+copy_to_Texture("kx")
+copy_to_Texture("dx")
+copy_to_Texture("ky")
+copy_to_Texture("dy")
+copy_to_Texture("kz")
+copy_to_Texture("dz")
+copy_to_Texture("iRho")
+--unroll_to_depth(2)
+--]]
diff --git a/test-chill/test-cases/examples/cuda-chill/mriq.c b/test-chill/test-cases/examples/cuda-chill/mriq.c
new file mode 100644
index 0000000..ba4b87c
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mriq.c
@@ -0,0 +1,33 @@
+#define N 32768
+#define M 3072
+struct kValues {
+  float Kx;
+  float Ky;
+  float Kz;
+  float PhiMag;
+};
+extern float sinf(float);
+extern float cosf(float);
+
+void
+ComputeQCPU(int numK, int numX,struct kValues kVals[M],float x[N], float y[N], float z[N],float Qr[N], float Qi[N]) {
+  float expArg;
+  float cosArg;
+  float sinArg;
+  float phi;
+  int i;
+  int j;
+  numK = M;
+  numX = N;
+  for ( i = 0; i < M; i++) {
+    for ( j = 0; j < N; j++) {
+      expArg = 6.2831853071795864769252867665590058f * (kVals[i].Kx * x[j] +kVals[i].Ky * y[j] +kVals[i].Kz * z[j]);
+      cosArg = cosf(expArg);
+      sinArg = sinf(expArg);
+      phi = kVals[i].PhiMag;
+      Qr[j] += phi * cosArg;
+      Qi[j] += phi * sinArg;
+    }
+  }
+}
+  
diff --git a/test-chill/test-cases/examples/cuda-chill/mriq.lua b/test-chill/test-cases/examples/cuda-chill/mriq.lua
new file mode 100644
index 0000000..1170111
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mriq.lua
@@ -0,0 +1,55 @@
+--CUBLAS 2 MM Multiply
+
+--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you
+--call init() and use global variables to specify procedure and loop
+
+--Second parameter is procedure # and third is loop #
+init("mriq.c", "ComputeQCPU", 0) 
+
+dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
+                      --copy_to_shared methods
+N=32768
+M=3072
+TI=128
+TJ=128
+
+permute(0,{"j","i"})
+--tile_by_index({"j","i"}, {TI,TJ}, {l1_control="jj", l2_control="ii"}, {"jj","ii", "j", "i"})
+tile_by_index({"i"}, {TJ}, {l1_control="ii",l1_tile="i"}, {"ii", "j","i"})
+tile_by_index({"j"}, {TI}, {l1_control="jj"}, {"ii","jj", "j", "i"})
+--tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"})
+--print_code()
+
+normalize_index("j")
+normalize_index("i")
+--print_code()
+--tile_by_index({"i"}, {TI}, {l1_control="iii",l1_tile="i"}, {"ii","jj", "iii","j","i"})
+--print_code()
+cudaize("Kernel_GPU", {x=N,y=N,z=N,Qr=N,Qi=N,kVals=M},{block={"jj"}, thread={"j"}})
+
+copy_to_shared("tx","kVals",1)
+--copy_to_shared("tx","x",1)
+--copy_to_shared("tx","y",1)
+--copy_to_shared("tx","z",1)
+
+--copy_to_texture("kVals")
+--datacopy(0, 3, "kVals", {"tt","t"},false,0,1,-16,true)
+--print_code()
+--datacopy_privatized(0,"tx","kVals",{"tx"})
+--copy_to_registers("tx","kVals")
+copy_to_registers("ii","x")
+copy_to_registers("ii","y")
+copy_to_registers("ii","z")
+copy_to_registers("ii","Qi")
+copy_to_registers("ii","Qr")
+--[[datacopy_privatized(0,"tx","x",{"tx"})
+datacopy_privatized(0,"tx","y",{"tx"})
+datacopy_privatized(0,"tx","z",{"tx"})
+datacopy_privatized(0,"tx","Qi",{"tx"})
+datacopy_privatized(0,"tx","Qr",{"tx"})
+
+
+]]--
+--unroll(0,5,64)
+print_code()
+--unroll_to_depth(1) --won't unroll past thread/loop mapping, unrolls up to two loop levels
diff --git a/test-chill/test-cases/examples/cuda-chill/mv-shadow.c b/test-chill/test-cases/examples/cuda-chill/mv-shadow.c
new file mode 100644
index 0000000..582b187
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mv-shadow.c
@@ -0,0 +1,9 @@
+#define N 1024
+
+void normalMV(float c[N][N], float a[N], float b[N]) {
+  int i, j;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      a[i] = a[i] + c[j][i] * b[j];
+}
diff --git a/test-chill/test-cases/examples/cuda-chill/mv-shadow.lua b/test-chill/test-cases/examples/cuda-chill/mv-shadow.lua
new file mode 100644
index 0000000..43e8491
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mv-shadow.lua
@@ -0,0 +1,65 @@
+init("mv-shadow.c","normalMV",0)
+dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
+                      --copy_to_shared methods
+
+N=129
+TI=32
+TJ=64
+
+N=1024
+TI=16
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+--Tile the i and j loop, introducing "ii" as the control loop for the "i"
+--tile, "k" for the control loop fo the "j" tile, with the final order
+--of {"ii", "k", "i", "j"}
+tile_by_index({"i","j"}, {TI,TJ}, {l1_control="ii", l2_control="k"}, {"ii", "k", "i", "j"})
+--tile_by_index({"i"}, {TI}, {l1_control="iii"}, {"ii", "k", "iii","i", "j"})
+--tile_by_index({"j"}, {TI}, {l2_control="k"}, { "k", "i", "j"})
+--tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"})
+--print_code()
+--Normalize indx will do a tile size of one over the loop level specified
+--by the input index. This is useful to get a zero lower bound and hard
+--upper bound on a loop instead of it being relative to previous loop
+--levels.
+--normalize_index("ii")
+normalize_index("i")
+print_code()
+
+--Cudaize now determines the grid dimentions from the loops themselves
+--(the upper bounds of the block and thread loops). It also renames the
+--given block and thread loops's indexes to the approviate values from
+--the set {"bx","by","tx","ty","tz"}. The second parameter specifies the
+--size of the arrays to be copied in the CUDA scaffolding.
+cudaize("mv_GPU", {a=N, b=N, c=N*N}, {block={"ii"}, thread={"i"}})
+--print_code()
+
+--Does a datacopy, tile, and add_sync to get a shared memory copy
+
+--copy_to_shared("tx", "b", 1)
+--copy_to_shared("tx", "c", -16)
+--print_code()
+--copy_to_texture("b")
+--copy_to_texture("c")
+copy_to_registers("k", "a")
+--print_code()
+
+unroll_to_depth(1) --won't unroll past thread/loop mapping, unrolls up to two loop levels
+--copy_to_texture("b")
+--print_code()
+--unroll(0,5,0)
+--print_code()
diff --git a/test-chill/test-cases/examples/cuda-chill/mv.c b/test-chill/test-cases/examples/cuda-chill/mv.c
new file mode 100644
index 0000000..582b187
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mv.c
@@ -0,0 +1,9 @@
+#define N 1024
+
+void normalMV(float c[N][N], float a[N], float b[N]) {
+  int i, j;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      a[i] = a[i] + c[j][i] * b[j];
+}
diff --git a/test-chill/test-cases/examples/cuda-chill/mv.lua b/test-chill/test-cases/examples/cuda-chill/mv.lua
new file mode 100644
index 0000000..ca54501
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mv.lua
@@ -0,0 +1,65 @@
+init("mv.c","normalMV",0)
+dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
+                      --copy_to_shared methods
+
+N=129
+TI=32
+TJ=64
+
+N=1024
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+--Tile the i and j loop, introducing "ii" as the control loop for the "i"
+--tile, "k" for the control loop fo the "j" tile, with the final order
+--of {"ii", "k", "i", "j"}
+tile_by_index({"i","j"}, {TI,TJ}, {l1_control="ii", l2_control="k"}, {"ii", "k", "i", "j"})
+--tile_by_index({"i"}, {TI}, {l1_control="iii"}, {"ii", "k", "iii","i", "j"})
+--tile_by_index({"j"}, {TI}, {l2_control="k"}, { "k", "i", "j"})
+--tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"})
+--print_code()
+--Normalize indx will do a tile size of one over the loop level specified
+--by the input index. This is useful to get a zero lower bound and hard
+--upper bound on a loop instead of it being relative to previous loop
+--levels.
+--normalize_index("ii")
+normalize_index("i")
+print_code()
+
+--Cudaize now determines the grid dimentions from the loops themselves
+--(the upper bounds of the block and thread loops). It also renames the
+--given block and thread loops's indexes to the approviate values from
+--the set {"bx","by","tx","ty","tz"}. The second parameter specifies the
+--size of the arrays to be copied in the CUDA scaffolding.
+cudaize("mv_GPU", {a=N, b=N, c=N*N}, {block={"ii"}, thread={"i"}})
+
+--print_code()
+
+--Does a datacopy, tile, and add_sync to get a shared memory copy
+
+--copy_to_shared("tx", "b", 1)
+--copy_to_shared("tx", "c", -16)
+--print_code()
+--copy_to_texture("b")
+--copy_to_texture("c")
+copy_to_registers("k", "a")
+--print_code()
+
+unroll_to_depth(1) --won't unroll past thread/loop mapping, unrolls up to two loop levels
+--copy_to_texture("b")
+--print_code()
+--unroll(0,5,0)
+--print_code()
diff --git a/test-chill/test-cases/examples/cuda-chill/mv_try.c b/test-chill/test-cases/examples/cuda-chill/mv_try.c
new file mode 100644
index 0000000..7781f3b
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mv_try.c
@@ -0,0 +1,9 @@
+#define N 4096
+
+void normalMV(int n, float c[N][N], float a[N], float b[N]) {
+  int i, j;
+
+  for (i = 0; i < n; i++)
+    for (j = 0; j < n; j++)
+      a[i] = a[i] + c[i][j] * b[j];
+}
diff --git a/test-chill/test-cases/examples/cuda-chill/mv_try.lua b/test-chill/test-cases/examples/cuda-chill/mv_try.lua
new file mode 100644
index 0000000..db4d9ad
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/mv_try.lua
@@ -0,0 +1,14 @@
+init("mv_try.c","normalMV",0)
+dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
+                      --copy_to_shared methods
+
+TI=96
+
+N=4096
+
+
+tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"})
+cudaize("mv_GPU", {a=N, b=N, c=N*N},
+        {block={"ii"}, thread={"i"}})
+
+print_code()
diff --git a/test-chill/test-cases/examples/cuda-chill/nbody.c b/test-chill/test-cases/examples/cuda-chill/nbody.c
new file mode 100644
index 0000000..57899b6
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/nbody.c
@@ -0,0 +1,66 @@
+#define NBODIES 16384
+#define SOFTENINGSQUARED 0.01f
+#define DELTATIME 0.001f
+#define DAMPING 1.0f
+
+#define NBLOCKSY 1
+#define NBLOCKSX (NBODIES/NTHREADSX)
+#define NTHREADSY 1 
+#define NTHREADSX 64
+
+#define BLOCKSIZE 128
+
+#define SHARED 1
+#define TIMER 1
+#define VERIFY 1
+
+extern float sqrtf(float);
+
+void nbody_cpu(float* oldpos,float* oldpos1, float *newpos, float *oldvel, float *newvel, float *force)
+{
+    float r0,r1,r2;
+    float invDist, invDistCube, mass, invMass;
+    unsigned int i,j;
+    for(i = 0; i < NBODIES; ++i) {
+        //force[i*4  ] = 0;
+        //force[i*4+1] = 0;
+        //force[i*4+2] = 0;
+        //force[i*4+3] = 0;
+        for(j = 0; j < NBODIES; ++j) {
+	    r0 = oldpos[j*4]-oldpos1[i*4];
+	    r1 = oldpos[j*4+1]-oldpos1[i*4+1];
+	    r2 = oldpos[j*4+2]-oldpos1[i*4+2];
+
+	    invDist = 1.0/sqrtf(r0 * r0 + r1 * r1 + r2 * r2 + SOFTENINGSQUARED);
+	    invDistCube =  invDist * invDist * invDist;
+	    mass = oldpos1[i*4+3];
+
+	    force[i*4] = force[i*4] + r0 * mass * invDistCube;
+	    force[i*4+1] = force[i*4+1] + r1 * mass * invDistCube;
+	    force[i*4+2] = force[i*4+2] + r2 * mass * invDistCube;
+
+        }
+    }
+
+/*    for (i = 0; i < NBODIES; ++i) {
+        invMass = oldvel[4*i+3];
+
+        oldvel[4*i] += (force[4*i] * invMass) * DELTATIME * DAMPING;
+        oldvel[4*i+1] += (force[4*i+1] * invMass) * DELTATIME * DAMPING;
+        oldvel[4*i+2] += (force[4*i+2] * invMass) * DELTATIME * DAMPING;
+
+        oldpos[4*i] += oldvel[4*i] * DELTATIME;
+        oldpos[4*i+1] += oldvel[4*i+1] * DELTATIME;
+        oldpos[4*i+2] += oldvel[4*i+2] * DELTATIME;
+
+        newpos[4*i+0] = oldpos[4*i];
+        newpos[4*i+1] = oldpos[4*i+1];
+        newpos[4*i+2] = oldpos[4*i+2];
+        newpos[4*i+3] = oldpos[4*i+3];
+
+        newvel[4*i+0] = oldvel[4*i];
+        newvel[4*i+1] = oldvel[4*i+1];
+        newvel[4*i+2] = oldvel[4*i+2];
+        newvel[4*i+3] = oldvel[4*i+3];
+    }*/
+}
diff --git a/test-chill/test-cases/examples/cuda-chill/nbody.lua b/test-chill/test-cases/examples/cuda-chill/nbody.lua
new file mode 100644
index 0000000..08f88a9
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/nbody.lua
@@ -0,0 +1,53 @@
+--CUBLAS 2 MM Multiply
+
+--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you
+--call init() and use global variables to specify procedure and loop
+
+--Second parameter is procedure # and third is loop #
+init("nbody.c", "nbody_cpu" , 0) 
+
+dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
+                     --copy_to_shared methods
+NBODIES=16384
+
+
+--Tj=128 CHANGE FOR BEST..... BEST IS 64BLOCKS 128THREADS
+--Ti=256
+Tj=64
+Ti=32
+Tjjj=1
+Tiii=1
+Tn=0.1
+--normalize_index("j")
+--
+--print_code()
+--normalize_index("n")
+-- TILE COMMANDS ZEROOOOOOOOOOO:3
+--tile_by_index({"i","j"},{Ti,Tj},{l1_control="ii",l2_control="jj"},{"ii","jj","i","j"})--CU=-1
+tile_by_index({"i"},{Ti},{l1_control="ii"},{"ii","i","j"})--CU=-1
+--normalize_index("i")
+--tile_by_index({"n"},{Tn},{l1_control="nn"},{"jj","ii","nn","j","i","n"})--CU=-1
+
+--tile_by_index({"j","i"},{Tjjj,Tiii},{l1_control="jjj",l2_control="iii"},{"jj","ii","nn","jjj","j","iii","i","n"})--CU=3
+--tile_by_index({"j"}, {Tn}, {l1_control="j",l1_tile="jjj"}, {"ii", "jj", "nn","jjj","j","i","n"})
+--tile_by_index({"i"}, {Ti/2}, {l1_control="iii"}, {"ii","iii", "jj","i","j"})
+--print_code()
+cudaize("kernel_GPU",{oldpos=4*NBODIES,oldpos1=4*NBODIES,oldvel=4*NBODIES,force=4*NBODIES,newpos=4*NBODIES,newvel=4*NBODIES},{block={"ii"}, thread={"i"}})--CU=3
+print_code()
+--tile(0,6,6)
+--copy_to_shared("tx","oldpos",-16)
+--copy_to_registers("j","oldpos")
+--copy_to_registers("j","oldpos1")
+--copy_to_registers("j","force")
+
+--copy_to_texture("oldpos")
+--tile(1,3,3)
+--tile(2,3,3)
+
+print_code()
+--unroll_to_depth(1)
+--
+--tile(2,3,3)
+--unroll(2,3,0)
+--unroll(0,5,0)
+--print_code()
diff --git a/test-chill/test-cases/examples/cuda-chill/tmv-shadow.c b/test-chill/test-cases/examples/cuda-chill/tmv-shadow.c
new file mode 100644
index 0000000..cb9ea8d
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/tmv-shadow.c
@@ -0,0 +1,9 @@
+#define N 1024
+
+void normalMV(float c[N][N], float a[N], float b[N]) {
+  int i, j;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      a[i] = a[i] + c[i][j] * b[j];
+}
diff --git a/test-chill/test-cases/examples/cuda-chill/tmv-shadow.lua b/test-chill/test-cases/examples/cuda-chill/tmv-shadow.lua
new file mode 100644
index 0000000..196b939
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/tmv-shadow.lua
@@ -0,0 +1,50 @@
+init("tmv-shadow.c","normalMV",0)
+dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
+                      --copy_to_shared methods
+
+N=1024
+--N= 8209
+--N=129
+TI=64
+N=1024
+TI=32
+--tile, "k" for the control loop for the "j" tile, with the final order
+--of {"ii", "k", "i", "j"}
+tile_by_index({"i","j"}, {TI,TI}, {l1_control="ii", l2_control="k"}, {"ii", "k", "i", "j"})
+--tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii",  "i", "j"})
+--print_code()
+--tile_by_index({"i"}, {TI/32}, {l1_control="iii"}, {"ii", "k", "iii","i", "j"})
+
+--print_code()
+--Normalize indx will do a tile size of one over the loop level specified
+--by the input index. This is useful to get a zero lower bound and hard
+--upper bound on a loop instead of it being relative to previous loop
+--levels.
+--normalize_index("i")
+--print_code()
+
+--Cudaize now determines the grid dimentions from the loops themselves
+--(the upper bounds of the block and thread loops). It also renames the
+--given block and thread loops's indexes to the approviate values from
+--the set {"bx","by","tx","ty","tz"}. The second parameter specifies the
+--size of the arrays to be copied in the CUDA scaffolding.
+cudaize("tmv_GPU", {a=N, b=N, c=N*N},{block={"ii"}, thread={"i"}})
+
+--print_code()
+
+--Does a datacopy, tile, and add_sync to get a shared memory copy
+copy_to_shared("tx", "b", 1)
+--copy_to_texture("b")
+--print_code()
+
+copy_to_shared("tx", "c", -16)
+--copy_to_texture("c")
+--print_code()
+
+copy_to_registers("k", "a")
+print_code()
+--unroll(0,5,0)
+--unroll(0,4,0)
+--unroll(2,4,16)
+unroll_to_depth(1)
+--print_code()
diff --git a/test-chill/test-cases/examples/cuda-chill/tmv.c b/test-chill/test-cases/examples/cuda-chill/tmv.c
new file mode 100644
index 0000000..cb9ea8d
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/tmv.c
@@ -0,0 +1,9 @@
+#define N 1024
+
+void normalMV(float c[N][N], float a[N], float b[N]) {
+  int i, j;
+
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      a[i] = a[i] + c[i][j] * b[j];
+}
diff --git a/test-chill/test-cases/examples/cuda-chill/tmv.lua b/test-chill/test-cases/examples/cuda-chill/tmv.lua
new file mode 100644
index 0000000..5071108
--- /dev/null
+++ b/test-chill/test-cases/examples/cuda-chill/tmv.lua
@@ -0,0 +1,50 @@
+init("tmv.c","normalMV",0)
+dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,
+                      --copy_to_shared methods
+
+N=1024
+--N= 8209
+--N=129
+TI=64
+N=1024
+TI=32
+--tile, "k" for the control loop for the "j" tile, with the final order
+--of {"ii", "k", "i", "j"}
+tile_by_index({"i","j"}, {TI,TI}, {l1_control="ii", l2_control="k"}, {"ii", "k", "i", "j"})
+--tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii",  "i", "j"})
+--print_code()
+--tile_by_index({"i"}, {TI/32}, {l1_control="iii"}, {"ii", "k", "iii","i", "j"})
+
+--print_code()
+--Normalize indx will do a tile size of one over the loop level specified
+--by the input index. This is useful to get a zero lower bound and hard
+--upper bound on a loop instead of it being relative to previous loop
+--levels.
+--normalize_index("i")
+--print_code()
+
+--Cudaize now determines the grid dimentions from the loops themselves
+--(the upper bounds of the block and thread loops). It also renames the
+--given block and thread loops's indexes to the approviate values from
+--the set {"bx","by","tx","ty","tz"}. The second parameter specifies the
+--size of the arrays to be copied in the CUDA scaffolding.
+cudaize("tmv_GPU", {a=N, b=N, c=N*N},{block={"ii"}, thread={"i"}})
+
+--print_code()
+
+--Does a datacopy, tile, and add_sync to get a shared memory copy
+copy_to_shared("tx", "b", 1)
+--copy_to_texture("b")
+--print_code()
+
+copy_to_shared("tx", "c", -16)
+--copy_to_texture("c")
+--print_code()
+
+copy_to_registers("k", "a")
+print_code()
+--unroll(0,5,0)
+--unroll(0,4,0)
+--unroll(2,4,16)
+unroll_to_depth(1)
+--print_code()
diff --git a/test-chill/test-cases/unit/chill-basic-python.tclist b/test-chill/test-cases/unit/chill-basic-python.tclist
new file mode 100644
index 0000000..555fa25
--- /dev/null
+++ b/test-chill/test-cases/unit/chill-basic-python.tclist
@@ -0,0 +1,20 @@
+build-chill-testcase -v dev -i python
+
+chill-testcase       test-cases/chill/test_distribute.py    test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_fuse.py          test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_known.py         test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_original.py      test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_peel.py          test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_permute.py       test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_print_code.py    test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_print_dep.py     test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_print_space.py   test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_reverse.py       test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_scale.py         test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_shift.py         test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_shift_to.py      test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_skew.py          test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_tile.py          test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_unroll_extra.py  test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_unroll.py        test-cases/chill/mm.c
+
diff --git a/test-chill/test-cases/unit/chill-basic-script.tclist b/test-chill/test-cases/unit/chill-basic-script.tclist
new file mode 100644
index 0000000..8bc34dc
--- /dev/null
+++ b/test-chill/test-cases/unit/chill-basic-script.tclist
@@ -0,0 +1,20 @@
+build-chill-testcase -v dev
+
+chill-testcase       test-cases/chill/test_distribute.script    test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_fuse.script          test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_known.script         test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_original.script      test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_peel.script          test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_permute.script       test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_print_code.script    test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_print_dep.script     test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_print_space.script   test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_reverse.script       test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_scale.script         test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_shift.script         test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_shift_to.script      test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_skew.script          test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_tile.script          test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_unroll_extra.script  test-cases/chill/mm.c
+chill-testcase       test-cases/chill/test_unroll.script        test-cases/chill/mm.c
+
diff --git a/test-chill/test-cases/unit/chill-basic.tclist b/test-chill/test-cases/unit/chill-basic.tclist
new file mode 100644
index 0000000..57cddbd
--- /dev/null
+++ b/test-chill/test-cases/unit/chill-basic.tclist
@@ -0,0 +1,4 @@
+
+-w $STAGING_DIR_WD -O $OMEGA_DEV_SRC -C $CHILL_DEV_SRC -b $STAGING_DIR_BIN batch test-cases/unit/chill-basic-script.tclist
+-w $STAGING_DIR_WD -O $OMEGA_DEV_SRC -C $CHILL_DEV_SRC -b $STAGING_DIR_BIN batch test-cases/unit/chill-basic-python.tclist
+
diff --git a/test-chill/testchill/__init__.py b/test-chill/testchill/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test-chill/testchill/__init__.py
diff --git a/test-chill/testchill/__main__.py b/test-chill/testchill/__main__.py
new file mode 100644
index 0000000..86415c3
--- /dev/null
+++ b/test-chill/testchill/__main__.py
@@ -0,0 +1,368 @@
+#TODO: setup and cleanup mechanism
+
+import argparse
+import logging
+import os
+import pickle
+import sys
+import textwrap
+
+from . import chill
+from . import gcov
+from . import test
+from . import util
+
+
+
+def make_local(argsns, arg_parser):
+    """
+    Make the local test case list. A convinience function for testing a local copy of chill.
+    @params argsns Command line arguments
+    @params arg_parser The ArgumentParser object
+    """    
+    util.mkdir_p(os.path.join(os.getcwd(), '.staging'), temp=True)
+    argsns.wd = os.path.join(os.getcwd(), '.staging/wd')
+    argsns.bin_dir = os.path.join(os.getcwd(), '.staging/bin')
+    argsns.chill_tc_dir = os.path.join(os.getcwd(), 'test-cases') # formally from the commandline
+    argsns.chill_dir = os.path.abspath(argsns.chill_dir)
+    argsns.chill_build_coverage = argsns.coverage_set is not None #TODO: make arg passed to local.
+    argsns.chill_test_coverage = argsns.coverage_set is not None
+    
+    util.mkdir_p(argsns.wd)
+    util.mkdir_p(argsns.bin_dir)
+    util.shell('cp', [os.path.join(argsns.chill_dir, 'examples/cuda-chill/cudaize.lua'), argsns.wd])
+    util.shell('cp', [os.path.join(argsns.chill_dir, 'examples/cuda-chill/cudaize.py'), argsns.wd])
+    
+    for config in chill.ChillConfig.configs(argsns.chill_dir, argsns.bin_dir):
+        build_testcase = chill.BuildChillTestCase(config, options={'coverage': argsns.chill_build_coverage}, coverage_set=argsns.coverage_set)
+        yield build_testcase
+        batch_file = os.path.join(argsns.chill_tc_dir, config.name + '.tclist')
+        for tc in make_batch_testcaselist(argsns, arg_parser, batch_file):
+            yield tc
+
+#def make_repo(argsns, arg_parser):
+    """
+    Make the repo test case list. A convinience function for testing chill from the repsitory.
+    @params argsns Command line arguments
+    @params arg_parser The ArgumentParser object
+    """
+#    util.mkdir_p(os.path.join(os.getcwd(), '.staging'), temp=True)
+#    argsns.bin_dir = os.path.join(os.getcwd(), '.staging/bin')
+#    argsns.repo_dir = os.path.join(os.getcwd(), '.staging/repo')
+#    argsns.chill_tc_dir = os.path.join(os.getcwd(), 'test-cases') # formally from the commandline
+#    argsns.wd = os.path.join(os.getcwd(), '.staging/wd')
+#    
+#    util.mkdir_p(argsns.bin_dir)
+#    util.mkdir_p(argsns.repo_dir)
+#    util.mkdir_p(argsns.wd)
+#    
+#    #TODO: Should these be hard coded?
+#    repo_root = 'shell.cs.utah.edu/uusoc/facility/res/hallresearch/svn_repo/resRepo/projects'
+#    for version in ['release', 'dev']:
+#        new_args = util.copy(argsns)
+#        if version == 'dev':
+#            chill_repo = 'svn+ssh://{}@{}/chill/branches/cuda-chill-rose'.format(new_args.svnuser, repo_root)
+#            chill_repo_name = 'chill'
+#            omega_repo = 'svn+ssh://{}@{}/omega/branches/cuda-omega-rose'.format(new_args.svnuser, repo_root)
+#            omega_repo_name = 'omega'
+#        elif version == 'release':
+#            chill_repo = 'svn+ssh://{}@{}/chill/release'.format(new_args.svnuser, repo_root)
+#            chill_repo_name = 'chill-release'
+#            omega_repo = 'svn+ssh://{}@{}/omega/release'.format(new_args.svnuser, repo_root)
+#            omega_repo_name = 'omega-release'
+#        new_args.omega_dir = os.path.join(new_args.repo_dir, omega_repo_name)
+#        new_args.chill_dir = os.path.join(new_args.repo_dir, chill_repo_name)
+#        util.shell('svn', ['export', '--force', omega_repo, new_args.omega_dir])
+#        util.shell('svn', ['export', '--force', chill_repo, new_args.chill_dir])
+#        util.shell('cp', [os.path.join(new_args.chill_dir, 'examples/cuda-chill/cudaize.lua'), new_args.wd])
+#        if version == 'dev':
+#            util.shell('cp', [os.path.join(new_args.chill_dir, 'examples/cuda-chill/cudaize.py'), new_args.wd])
+#        # do omega: (just build it for now)
+#        yield omega.BuildOmegaTestCase(new_args.omega_dir ,version)
+#        # do chill
+#        for config in chill.ChillConfig.configs(new_args.omega_dir, new_args.chill_dir, new_args.bin_dir, version=version):
+#            yield chill.BuildChillTestCase(config, coverage_set=argsns.coverage_set)
+#            batch_file = os.path.join(argsns.chill_tc_dir, config.name() + '.tclist')
+#            if os.path.exists(batch_file):
+#                for tc in make_batch_testcaselist(new_args, arg_parser, batch_file):
+#                    yield tc
+
+def make_runchill_testcase(argsns):
+    """
+    Make a RunChillTestCase from the given argument namespace
+    @param argsns Command line arguments
+    """
+    assert (argsns.chill_dir != None) or (argsns.bin_dir != None)
+    
+    ### Required parameters ###
+    wd = os.path.abspath(argsns.wd)
+    chill_script = os.path.abspath(argsns.chill_script)
+    chill_src = os.path.abspath(argsns.chill_src)
+    coverage_set = argsns.coverage_set
+    
+    ### Options to pass to the chill test case ###
+    options = dict()
+    options['compile-src'] = argsns.chill_test_compile_src
+    options['run-script'] = argsns.chill_test_run_script
+    options['compile-gensrc'] = argsns.chill_test_compile_gensrc
+    options['check-run-script-stdout'] = argsns.chill_test_check_run_script
+    options['coverage'] = argsns.chill_test_coverage
+    
+    ### choose interface language from script extension if none is given ###
+    if argsns.chill_script_lang is None:
+        argsns.chill_script_lang = chill.ChillConfig.ext_to_script_lang(chill_script.split('.')[-1])
+    
+    config = chill.ChillConfig(
+        chill_dir = os.path.abspath(argsns.chill_dir) if argsns.chill_dir != None else None,
+        bin_dir = os.path.abspath(argsns.bin_dir) if argsns.bin_dir != None else None,
+        build_cuda = argsns.build_cuda,
+        script_lang = argsns.chill_script_lang)
+    
+    return chill.RunChillTestCase(config, chill_script, chill_src, wd=wd, options=options, coverage_set=coverage_set)
+
+def make_buildchill_testcase(argsns):
+    """
+    Make a BuilChillTestCase from the given argument namespace
+    @param argsns Command line arguments
+    """
+    assert argsns.chill_dir != None
+    
+    coverage_set = argsns.coverage_set
+    
+    options = dict()
+    options['coverage'] = argsns.chill_build_coverage
+    
+    config = chill.ChillConfig(
+        chill_dir = os.path.abspath(argsns.chill_dir) if argsns.chill_dir != None else None,
+        bin_dir = os.path.abspath(argsns.bin_dir) if argsns.bin_dir != None else None,
+        build_cuda = argsns.build_cuda,
+        script_lang = argsns.chill_script_lang)
+    
+    return chill.BuildChillTestCase(config, options=options, coverage_set=coverage_set)
+
+def make_batch_testcaselist(argsns, arg_parser, batch_file=None):
+    """
+    Make a list of test cases from a file.
+    @param argsns The parent argument namespace
+    @param arg_parser The argument parser. Used to parse lines from the batch file.
+    @param batch_file The batch file name
+    """
+    if batch_file is None:
+        batch_file = argsns.batch_file
+    with open(batch_file, 'r') as f:
+        for txt_line in f.readlines():
+            if len(txt_line.strip()) == 0: continue         # skip empty lines
+            if txt_line.strip().startswith('#'): continue   # skip comment lines
+            args = util.applyenv(txt_line.strip())          # replace environment variables with thier values
+            args = args.split()                             # split by whitespace
+            for tc in args_to_tclist(args, arg_parser, argsns):
+                yield tc
+
+@util.callonce
+def add_local_args(arg_parser):
+    """
+    Command line arguments for the local command
+    @param arg_parser The local ArgumentParser object
+    """
+    arg_parser.add_argument('chill_dir', metavar='chill-home', default='../')
+    #arg_parser.add_argument('-v', '--chill-branch', dest='chill_version', default='dev', choices=['release','dev'])
+    # - Testing should consider all interface languages. Will uncomment if testing takes too long
+    # arg_parser.add_argument('-i', '--interface-lang', nargs=1, action='append', dest='chill_script_lang_list', choices=['script','lua','python'])
+    # arg_parser.add_argument('-t', '--testcase-dir', dest='chill_tc_dir', default=os.path.join(os.getcwd(), 'test-cases/'))
+    arg_parser.set_defaults(wd=os.path.join(os.getcwd(), '.staging/wd'))
+    arg_parser.set_defaults(bin_dir=os.path.join(os.getcwd(), '.staging/bin'))
+
+@util.callonce
+def add_repo_args(arg_parser):
+    """
+    Command line arguments for the repo command
+    @param arg_parser The local ArgumentParser object
+    """
+    arg_parser.add_argument('svnuser', metavar='svn-user-name')
+
+def add_boolean_option(arg_parser, name, dest, default=True, help_on=None, help_off=None):
+    """
+    Add a boolean option.
+    @param parg_parser The ArgumentParser object
+    @param name The name of the parameter
+    @param dest The dest parameter passed to the ArgumentParser
+    @param default The default value
+    @param help_on The help parameter for the true option
+    @param help_off The help parameter for the false option
+    """
+    group = arg_parser.add_mutually_exclusive_group()
+    group.add_argument('--' + name, action='store_true', dest=dest, default=default, help=help_on)
+    group.add_argument('--no-' + name, action='store_false', dest=dest, default=default, help=help_off)
+
+def add_chill_common_args(arg_parser):
+    """
+    Common chill command line arguments.
+    @param arg_parser The ArgumentParser object
+    """
+    cuda_group = arg_parser.add_mutually_exclusive_group()
+    cuda_group.add_argument('-u', '--target-cuda', action='store_const', const=True, dest='build_cuda', default=False, help='Test cuda-chill. (Default is chill)')
+    cuda_group.add_argument('-c', '--target-c', action='store_const', const=False, dest='build_cuda', default=False, help='Test chill. (Default is chill)')
+    arg_parser.add_argument('-i', '--interface-lang', dest='chill_script_lang', choices=['script','lua','python'], default=None, help='Chill interface language. If an interface language is not specified, it will be determined by the script file name.')
+
+@util.callonce
+def add_chill_run_args(arg_parser):
+    """
+    Command line arguments specific to running a chill test case
+    @param arg_parser The ArgumentParser object
+    """
+    arg_parser.add_argument('chill_script', help='Chill script file.', metavar='chill-script')
+    arg_parser.add_argument('chill_src', help='Chill source file.', metavar='chill-src')
+    add_boolean_option(arg_parser, 'compile-src', dest='chill_test_compile_src', default=True, help_on='Compile source file.', help_off='Do not compile source file.')
+    add_boolean_option(arg_parser, 'run-script', dest='chill_test_run_script', default=True, help_on='Run chill script.', help_off='Do not run chill script.')
+    add_boolean_option(arg_parser, 'compile-gensrc', dest='chill_test_compile_gensrc', default=True, help_on='Compile generated source file', help_off='Do not compile generated source file.')
+    add_boolean_option(arg_parser, 'check-run-script', dest='chill_test_check_run_script', default=False, help_on='Diff stdout from chill script against a benchmark.')
+    add_boolean_option(arg_parser, 'test-coverage', 'chill_test_coverage', default=False, help_on='Run chill and record code coverage (default).', help_off='Run chill normally without recording code coverage.')
+
+@util.callonce
+def add_chill_build_args(arg_parser):
+    """
+    Command line arguments specific to building chill and testing the build process
+    @params arg_parser The ArgumentParser object
+    """
+    add_boolean_option(arg_parser, 'build-coverage', 'chill_build_coverage', default=False, help_on='Build chill for code coverage flags (default).', help_off='Build chill normally without code coverage flags.')
+
+@util.callonce
+def add_local_command(command_group):
+    """
+    Add local to the subcommand group
+    @param command_group the subparser group object
+    """
+    local_arg_parser = command_group.add_parser('local')
+    add_local_args(local_arg_parser)
+    local_arg_parser.set_defaults(func=lambda a, ap: make_local(a, ap))
+
+@util.callonce
+def add_repo_command(command_group):
+    """
+    Add repo to the subcommand group
+    @param command_group the subparser group object
+    """
+    repo_arg_parser = command_group.add_parser('repo')
+    add_repo_args(repo_arg_parser)
+    repo_arg_parser.set_defaults(func=lambda a, ap: make_repo(a, ap))
+
+@util.callonce
+def add_chill_command(command_group):
+    """
+    Add chill-testcase to the subcommand group
+    @param command_group The subparser group object
+    """
+    chill_arg_parser = command_group.add_parser('chill-testcase')
+    add_chill_run_args(chill_arg_parser)
+    add_chill_common_args(chill_arg_parser)
+    chill_arg_parser.set_defaults(func=lambda a, ap: [make_runchill_testcase(a)])
+
+@util.callonce
+def add_buildchill_command(command_group):
+    """
+    Add build-chill-testcase to the subcommand group
+    @param command_group The subparser group object
+    """
+    buildchill_arg_parser = command_group.add_parser('build-chill-testcase')
+    add_chill_common_args(buildchill_arg_parser)
+    add_chill_build_args(buildchill_arg_parser)
+    buildchill_arg_parser.set_defaults(func=lambda a, ap: [make_buildchill_testcase(a)])
+
+@util.callonce
+def add_batch_args(arg_parser):
+    """
+    Command line arguments for the batch file command
+    @param arg_parser The ArgumentParser object
+    """
+    arg_parser.add_argument('batch_file', help='Batch file', metavar='batch-filename')
+
+@util.callonce
+def add_batch_command(command_group):
+    """
+    Add batch command to the subcommand group
+    @param command_group The subparser group object
+    """
+    batch_arg_parser = command_group.add_parser('batch')
+    add_batch_args(batch_arg_parser)
+    batch_arg_parser.set_defaults(func=make_batch_testcaselist)
+
+@util.callonce
+def add_commands(arg_parser):
+    """
+    Add the subcommand group
+    @param arg_parser The ArgumentParser object
+    """
+    command_group = arg_parser.add_subparsers(title='commands')
+    add_local_command(command_group)
+    #add_repo_command(command_group)
+    add_chill_command(command_group)
+    add_buildchill_command(command_group)
+    add_batch_command(command_group)
+
+@util.callonce
+def add_global_args(arg_parser):
+    """
+    Add arguments that are used for most subcommands
+    @param arg_parser The ArgumentParser object
+    """
+    arg_parser.add_argument('-w', '--working-dir', dest='wd', default=os.getcwd(), help='The working directory. (Defaults to the current directory)', metavar='working-directory')
+    arg_parser.add_argument('-R', '--rose-home',  dest='rose_dir', default=os.getenv('ROSEHOME'), help='Rose home directory. (Defaults to ROSEHOME)', metavar='rose-home')
+    arg_parser.add_argument('-C', '--chill-home', dest='chill_dir', default=os.path.join(os.getcwd(), '..'), help='Chill home directory. (Defaults to CHILLHOME)', metavar='chill-home')
+    arg_parser.add_argument('-b', '--binary-dir', dest='bin_dir', default=os.path.join(os.getcwd(), '..'), help='Binary directory.', metavar='bin-dir')
+    
+@util.callonce
+def make_argparser():
+    """
+    Create the argument parser.
+    """
+    arg_parser = argparse.ArgumentParser(
+        prog='python -m testchill',
+        description=textwrap.dedent('''\
+            
+            To test a local working copy of chill (from the development branch):
+            --------------------------------------------------------------------  
+            - Run `python -m testchill local`
+            
+        '''),
+        epilog='EPILOG',
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    
+    add_global_args(arg_parser)
+    add_commands(arg_parser)
+    
+    # ...
+    
+    return arg_parser
+
+def args_to_tclist(args=sys.argv[1:], arg_parser=make_argparser(), argsns=None, **kwargs):
+    """
+    Parse one line and return a list of test cases.
+    @params args Raw arguments to be passed to the ArgumentParser object (defaults to sys.args[1:])
+    @params arg_parser The ArgumentParser object (defaults to an ArgumentParser returned by make_argparser())
+    @params argsns The top level argument namespace (defaults to None)
+    """
+    if not argsns is None:                           # if an argsns is given,
+        argsns = util.copy(argsns, exclude=['func']) # make a shallow copy, (excluding func)
+    argsns = arg_parser.parse_args(args, namespace=argsns)
+    for k,v in kwargs.items():
+        setattr(argsns, k, v)
+    return list(argsns.func(argsns, arg_parser))
+
+@util.callonce
+def main():
+    coverage = gcov.GcovSet()
+    results = list(test.run(args_to_tclist(coverage_set=coverage)))
+    test.pretty_print_results(results)
+    util.rmtemp()
+    
+    with open('coverage.pickle', 'wb') as f:
+        pickle.dump(coverage, f, 2)
+    with open('testresults.pickle', 'wb') as f:
+        pickle.dump(results, f, 2)
+    
+    if any(s.failed() or s.errored() for s in results):
+        sys.exit(1)
+
+if __name__ == '__main__':
+    main()
+    
diff --git a/test-chill/testchill/_cpp_validate_env.py b/test-chill/testchill/_cpp_validate_env.py
new file mode 100644
index 0000000..9ef5a71
--- /dev/null
+++ b/test-chill/testchill/_cpp_validate_env.py
@@ -0,0 +1,654 @@
+import ast as _pyast
+import collections as _pycollections
+import functools as _pyfunctools
+import itertools as _pyitertools
+import random as _pyrandom
+import struct as _pystruct
+import types as _pytypes
+
+from . import util as _chill_util
+
+_pylambdatype = _pycollections.namedtuple('LambdaType', ['paramtypes','exprtype'])
+_pyarraytype = _pycollections.namedtuple('ArrayType', ['dimensions','basetype'])
+
+_runtime_globals = dict({
+        '_pyitertools':_pyitertools,
+        '_pyrandom':_pyrandom
+    })
+
+def _evalexpr(expr, target_type, bindings):
+    glbls = dict(bindings)
+    glbls.update(_runtime_globals)
+    if target_type is None:
+        pytype = None
+    else:
+        pytype = target_type.getpytype()
+    expr = _pyast.Expression(expr.compile_expr(pytype))
+    expr = _pyast.fix_missing_locations(expr)
+    return eval(compile(expr, '<string>', 'eval'), glbls)
+
+def _addbindings(expr, binding_frame):
+    if hasattr(expr, 'binding_stack'):
+        expr.binding_stack = [binding_frame] + expr.binding_stack
+    return expr
+
+
+class _TreeNode(object):
+    def print_tree(self, stream=None, indent=0):
+        strname = type(self).__name__
+        stream.write(strname + ':\n')
+        indent += 2
+        for k,v in vars(self).items():
+            if isinstance(v, _TreeNode):
+                stream.write(('{}{}:'.format(' '*indent, k)))
+                v.print_tree(stream, indent + len(k))
+            elif isinstance(v, list):
+                stream.write(('{}{}: [\n'.format(' '*indent, k)))
+                for itm in v:
+                    if isinstance(itm, _TreeNode):
+                        stream.write(' '*indent)
+                        itm.print_tree(stream, indent + len(k) + 1)
+                    else:
+                        stream.write('{}{}\n'.format(' '*(indent + 1), str(itm)))
+            else:
+                stream.write(('{}{}: {}\n'.format(' '*indent, k, str(v))))
+
+class _CppType(_TreeNode):
+    def __init__(self):
+        pass
+    
+    def __repr__(self):
+        return "{}".format(str(self))
+    
+    def statictype(self, bindings):
+        return self
+    
+    def formatdata(self, data):
+        raise NotImplementedError
+    
+    def get_cdecl_stmt(self, param_name):
+        raise NotImplementedError
+    
+    def get_cread_stmt(self, param_name, istream_name, dims):
+        raise NotImplementedError
+    
+    def get_cwrite_stmt(self, param_name, ostream_name, dims):
+        raise NotImplementedError
+    
+    def getfreevars(self, glbls):
+        raise NotImplementedError
+
+
+class _CppPrimitiveType(_CppType):
+    _bycppname = {
+            'char':                 ('char', 'c', 1, False, False, True, False),
+            'signed char':          ('signed char', 'b', 1, True, False, False, False),
+            'unsigned char':        ('unsigned char', 'B', 1, True, False, False, False),
+            'short':                ('short', 'h', 2, True, False, False, True),
+            'unsigned short':       ('unsigned short', 'H', 2, True, False, False, False),
+            'int':                  ('int', 'i', 4, True, False, False, True),
+            'unsigned int':         ('unsigned int', 'I', 4, True, False, False, False),
+            'long':                 ('long', 'l', 4, True, False, False, True),
+            'unsigned long':        ('unsigned long', 'L', 4, True, False, False, False),
+            'long long':            ('long long', 'q', 8, True, False, False, True),
+            'unsigned long long':   ('unsigned long long', 'Q', 8, True, False, False, False),
+            'float':                ('float', 'f', 4, False, True, False, True),
+            'double':               ('double', 'd', 8, False, True, False, True)
+        }
+    def __init__(self, cppname, structfmt, size, isint, isfloat, ischar, issigned):
+        _CppType.__init__(self)
+        self.cppname = cppname
+        self.size = size
+        self.size_expr = 'sizeof(' + cppname + ')'
+        self.structfmt = structfmt
+        self.isint = isint
+        self.isfloat = isfloat
+        self.ischar = ischar
+        self.issigned = issigned
+    
+    @staticmethod
+    def get_from_cppname(cppname):
+        return _CppPrimitiveType(*_CppPrimitiveType._bycppname[cppname])
+    
+    def getfreevars(self, glbls):
+        return set()
+    
+    def getpytype(self):
+        if self.ischar:
+            return str
+        elif self.isint:
+            return int
+        elif self.isfloat:
+            return float
+    
+    def __str__(self):
+        return self.cppname
+    
+    def formatdata(self, data):
+        return [1], _pystruct.pack(self.structfmt, data)
+    
+    def get_cdecl_stmt(self, param_name):
+        return '{} {};'.format(self.cppname, param_name)
+    
+    def get_cread_stmt(self, param_name, istream_name, dims):
+        return '{}.read((const char*)&{}, {});'.format(istream_name, param_name, self.size_expr)
+    
+    def get_cwrite_stmt(self, param_name, ostream_name, dims):
+        return '{}.write((const char*)&{}, {});'.format(ostream_name, param_name, self.size_expr)
+
+
+class _CppVoidType(_CppType):
+    def __init__(self):
+        self.cppname = 'void'
+    
+    def getfreevars(self, glbls):
+        return set()
+    
+    def getpytype(self):
+        return type(None)
+    
+    def __str__(self):
+        return 'void'
+
+
+class _CppArrayType(_CppType):
+    def __init__(self, basetype, dims=[None]):
+        _CppType.__init__(self)
+        self.basetype = basetype
+        self.dimensions = dims
+    
+    def getfreevars(self, glbls):
+        freevars = self.basetype.getfreevars(glbls)
+        for fv in iter(d.getfreevars(glbls) for d in self.dimensions if hasattr(d, 'getfreevars')):
+            freevars = freevars | fv
+        return freevars
+    
+    def getpytype(self):
+        return _pyarraytype(self.dimensions, self.basetype.getpytype())
+    
+    def __str__(self):
+        return '{}[{}]'.format(str(self.basetype), ']['.join(map(str,self.dimensions)))
+    
+    def statictype(self, bindings):
+        dim_list = list()
+        for dim in self.dimensions:
+            if dim is None:
+                dim_list.append(None)
+            else:
+                dim_list.append(_evalexpr(dim, _CppPrimitiveType.get_from_cppname('int'), bindings))
+        return _CppArrayType(self.basetype.statictype(bindings), dim_list)
+    
+    def _formatdata_array(self, unit_length, data):
+        read_length = 0
+        if _chill_util.python_version_major == 2:
+            read_data = ''
+        else:
+            read_data = bytes()
+        while read_length < len(data):
+            for i in range(unit_length):
+                _, b = self.basetype.formatdata(data[read_length+i])
+                read_data += b
+            read_length += unit_length
+        return read_data
+    
+    def formatdata(self, data):
+        prod = lambda l: _pyfunctools.reduce(lambda a,v: a*v, l, 1)
+        if self.dimensions[0] is None:
+            return self.dimensions, self._formatdata_array(prod(self.dimensions[1:]), data)
+        else:
+            return self.dimensions, self._formatdata_array(prod(self.dimensions), data)
+    
+    def get_cdecl_stmt(self, param_name):
+        return '{} {}[{}];'.format(str(self.basetype), param_name, ']['.join(map(str,self.dimensions)))
+    
+    def get_cread_stmt(self, param_name, istream_name, dims):
+        length = _pyfunctools.reduce(lambda a,v: a*v, self.dimensions)
+        #TODO: use dims
+        if isinstance(self.basetype, _CppPrimitiveType):
+            size_expr = '{}*{}'.format(length, self.basetype.size_expr)
+            return '{}.read((char*){}, {});'.format(istream_name, param_name, size_expr)
+        else:
+            raise NotImplementedError
+    
+    def get_cwrite_stmt(self, param_name, ostream_name, dims):
+        length = _pyfunctools.reduce(lambda a,v: a*v, self.dimensions)
+        #TODO: use dims
+        if isinstance(self.basetype, _CppPrimitiveType):
+            size_expr = '{}*{}'.format(length, self.basetype.size_expr)
+            return '{}.write((char*){}, {});'.format(ostream_name, param_name, size_expr)
+        else:
+            raise NotImplementedError
+
+
+class _CppPointerType(_CppType):
+    def __init__(self, basetype):
+        _CppType.__init__(self)
+        self.basetype = basetype
+    
+    def getfreevars(self, glbls):
+        return self.basetype.getfreevars(glbls)
+    
+    def getpytype(self):
+        return self.basetype.getpytype()
+    
+    def __str__(self):
+        return '{}*'.format(str(self.basetype))
+    
+    def statictype(self, bindings):
+        return _CppPointerType(self.basetype.statictype(bindings))
+    
+    def formatdata(self, data):
+        if isinstance(data, list):
+            if _chill_util.python_version_major == 2:
+                read_data = ''
+            else:
+                read_data = bytes()
+            for data_item in data:
+                next_dims, b = self.basetype.formatdata(data_item)
+                read_data += b
+            return [len(data)] + next_dims, read_data
+        else:
+            dims, fmt_data = self.basetype.formatdata(data)
+            return [1] + dims, fmt_data
+
+
+class _CppReferenceType(_CppType):
+    def __init__(self, basetype):
+        _CppType.__init__(self)
+        self.basetype = basetype
+    
+    def getfreevars(self, glbls):
+        return self.basetype.getfreevars(glbls)
+    
+    def getpytype(self):
+        return self.basetype.getpytype()
+    
+    def __str__(self):
+        return '{}&'.format(str(self.basetype))
+    
+    def statictype(self, bindings):
+        return _CppReferenceType(self.basetype.statictype(bindings))
+    
+    def formatdata(self, data):
+        dims, fmt_data = self.basetype.formatdata(data)
+        return dims, fmt_data
+
+
+class _Parameter(_TreeNode):
+    def __init__(self, name, cpptype, direction, init_expr=None):
+        self.name = name
+        self.direction = direction
+        self.cpptype = cpptype
+        self.init_expr = init_expr
+        self._generated = None
+    
+    @staticmethod
+    def order_by_freevars(param_list, glbls=set()):
+        defined_names = set()
+        parameter_names = set(p.name for p in param_list)
+        param_queue = _pycollections.deque(param_list)
+        while len(param_queue):
+            param = param_queue.popleft()
+            freevars = (parameter_names & param.getfreevars(glbls)) - defined_names
+            if not len(freevars):
+                defined_names.add(param.name)
+                yield param
+            else:
+                param_queue.append(param)
+    
+    def getfreevars(self, glbls=set()):
+        freevars = set()
+        if self.init_expr is not None:
+            freevars = freevars | self.init_expr.getfreevars(glbls)
+        freevars = freevars | self.cpptype.getfreevars(glbls)
+        return freevars
+    
+    def generatedata(self, bindings=dict()):
+        if self._generated is None:
+            if self.init_expr is None:
+                py_data = None
+            else:
+                py_data = _evalexpr(self.init_expr, self.cpptype, bindings)
+            static_type = self.cpptype.statictype(bindings)
+            dims, data = static_type.formatdata(py_data)
+            self._generated = (self.name, static_type, dims, data)
+            return self.name, static_type, dims, data
+        else:
+            return self._generated
+
+
+class _Procedure(_TreeNode):
+    def __init__(self, name, rtype, parameters):
+        self.name = name
+        self.rtype = rtype
+        self.parameters = parameters
+        self.binding_stack = []
+        self._bindings = None
+        self._params_orderd = None
+        self._invoke_str = '{}({});'.format(self.name, ','.join([p.name for p in parameters]))
+    
+    def _order_params(self):
+        if not self._params_orderd:
+            self._params_orderd = list(_Parameter.order_by_freevars(self.parameters))
+    
+    def _compute_bindings(self, global_bindings):
+        local_bindings = dict(global_bindings)
+        if self._bindings is None:
+            new_bindings = dict()
+            for binding_frame in self.binding_stack:
+                for name, (ctype, expr) in binding_frame.items():
+                    value = _evalexpr(expr, ctype, local_bindings)
+                    new_bindings[name] = value
+                    local_bindings[name] = value
+            self._bindings = new_bindings
+        local_bindings.update(self._bindings)
+        return local_bindings
+    
+    def generatedata(self, direction_list, global_bindings=None):
+        self._order_params()
+        if global_bindings is None:
+            global_bindings = dict()
+        bindings = self._compute_bindings(global_bindings)
+        for param in (p for p in self._params_orderd if p.direction in direction_list):
+            p_name, p_statictype, p_dims, p_data = param.generatedata(bindings)
+            #TODO: add binding
+            yield p_name, p_statictype, p_dims, p_data
+    
+    def generatedecls(self, bindings):
+        for p_name, p_statictype, p_dims, p_data in self.generatedata(['in','out','inout'], bindings):
+            yield p_statictype.get_cdecl_stmt(p_name)
+        #for p_name, p_statictype, p_dims, p_data in self.generatedata('out', bindings):
+        #    yield p_statictype.get_cdecl_stmt(p_name)
+    
+    def generatereads(self, direction_list, stream, bindings):
+        for p_name, p_statictype, p_dims, p_data in self.generatedata(direction_list, bindings):
+            yield p_statictype.get_cread_stmt(p_name, stream, p_dims)
+    
+    def generatewrites(self, stream, bindings):
+        for p_name, p_statictype, p_dims, p_data in self.generatedata(['inout', 'out'], bindings):
+            yield p_statictype.get_cwrite_stmt(p_name, stream, p_dims)
+    
+    def getinvokestr(self):
+        return self._invoke_str
+
+
+class _Expr(_TreeNode):
+    def __init__(self):
+        pass
+    
+    def getfreevars(self, glbls):
+        raise NotImplementedError
+    
+    def compile_to_lambda(self, glbls, target_type):
+        args = _pyast.arguments(list(_pyast.Name(n, _pyast.Param()) for n in self.getfreevars(self, glbls)), None, None, [])
+        expr = _pyast.Expression(_pyast.Lambda(args, self.compile_expr(target_type)))
+        expr = _pyast.fix_missing_locations(expr)
+        return eval(compile(expr, '<string>', 'eval'))
+    
+    def compile_expr(self, target_type):
+        raise NotImplementedError
+
+
+class _ConstantExpr(_Expr):
+    def __init__(self, value):
+        self.value = value
+    
+    def compile_expr(self, target_type):
+        if target_type is None:
+            return _pyast.parse(self.value, '<string>', 'eval').body
+        elif target_type == chr:
+            return _pyast.Str(chr(self.value))
+        elif target_type == int:
+            return _pyast.Num(int(self.value))
+        elif target_type == str:
+            return _pyast.Str(str(self.value))
+        elif target_type == float:
+            return _pyast.Num(float(self.value))
+    
+    def getfreevars(self, glbls):
+        return set()
+    
+    def __str__(self):
+        return self.value
+
+
+class _NameExpr(_Expr):
+    def __init__(self, name):
+        self.name = name
+    
+    def compile_expr(self, target_type):
+        return _pyast.Name(self.name, _pyast.Load())
+    
+    def getfreevars(self, glbls):
+        if self.name not in glbls:
+            return set([self.name])
+        else:
+            return set()
+    
+    def __str__(self):
+        return self.name
+
+
+class _AttributeExpr(_Expr):
+    def __init__(self, expr, name):
+        self.expr = expr
+        self.name = name
+    
+    def compile_expr(self, target_type):
+        return _pyast.Attribute(
+            self.expr.compile_expr(None),
+            self.name,
+            _pyast.Load())
+    
+    def getfreevars(self, glbls):
+        return self.expr.getfreevars(glbls)
+    
+    def __str__(self):
+        return '{}.{}'.format(str(self.expr), self.name)
+
+
+class _BinExpr(_Expr):
+    _optypes = {
+            '+':  _pyast.Add,
+            '-':  _pyast.Sub,
+            '*':  _pyast.Mult,
+            '**': _pyast.Pow,
+            '/':  _pyast.Div
+        }
+    def __init__(self, left, op, right):
+        self.left = left
+        self.right = right
+        self.op = op
+    
+    def compile_expr(self, target_type):
+        return _pyast.BinOp(
+                self.left.compile_expr(target_type),
+                _BinExpr._optypes[self.op](),
+                self.right.compile_expr(target_type))
+    
+    def getfreevars(self, glbls):
+        return self.left.getfreevars(glbls) | self.right.getfreevars(glbls)
+    
+    def __str__(self):
+        return '({}{}{})'.format(str(self.left),self.op,str(self.right))
+
+
+class _UnaryExpr(_Expr):
+    _optypes = {
+            '-': _pyast.USub
+        }
+    def __init__(self, op, expr):
+        self.op = op
+        self.expr = expr
+    
+    def compile_expr(self, target_type):
+        return _pyast.UnaryOp(
+                _UnaryExpr._optypes[self.op](),
+                self.expr.compile_expr(target_type))
+    
+    def getfreevars(self, glbls):
+        return self.expr.getfreevars(glbls)
+    
+    def __str__(self):
+        return '({}{})'.format(self.op, str(self.expr))
+
+
+class _LambdaExpr(_Expr):
+    def __init__(self, params, expr):
+        self.params = params
+        self.expr = expr
+    
+    def compile_expr(self, target_type):
+        if target_type is None:
+            exprtype = None
+        else:
+            assert hasattr(target_type, 'paramtypes')
+            assert hasattr(target_type, 'exprtype')
+            exprtype = target_type.exprtype
+        if _chill_util.python_version_major == 2:
+            return _pyast.Lambda(
+                _pyast.arguments([_pyast.Name(p, _pyast.Param()) for p in self.params], None, None, []),
+                self.expr.compile_expr(exprtype))
+        else:
+            return _pyast.Lambda(
+                _pyast.arguments([_pyast.arg(p, None) for p in self.params], None, None, [], None, None, [], []),
+                self.expr.compile_expr(exprtype))
+    
+    def getfreevars(self, glbls):
+        new_glbls = set(glbls)
+        new_glbls = new_glbls | set(self.params)
+        return self.expr.getfreevars(new_glbls)
+    
+    def __str__(self):
+        return 'lambda {}:{}'.format(','.join(map(str,self.params)), str(self.expr))
+
+
+class _InvokeExpr(_Expr):
+    def __init__(self, func, parameters):
+        self.func = func
+        self.parameters = parameters
+    
+    def compile_expr(self, target_type):
+        if target_type is None:
+            lt = None
+        else:
+            lt = _pylambdatype([None for p in self.parameters], target_type)
+        return _pyast.Call(
+                self.func.compile_expr(lt),
+                [p.compile_expr(None) for p in self.parameters],
+                [],
+                None,
+                None)
+    
+    def getfreevars(self, glbls):
+        return set(
+            self.func.getfreevars(glbls) |
+            _pyfunctools.reduce(lambda a,v: a | v.getfreevars(glbls), self.parameters, set()))
+    
+    def __str__(self):
+        return '{}({})'.format(str(self.func),','.join(map(str,self.parameters)))
+
+
+class _Generator(_Expr):
+    def __init__(self):
+        _Expr.__init__(self)
+    
+    
+class _MatrixGenerator(_Generator):
+    def __init__(self, dims, genexpr):
+        self.dimensions = dims
+        self.genexpr = genexpr
+    
+    def _compile_dims(self, target_type):
+        if hasattr(target_type, 'dimensions'):
+            dim_exprs = list()
+            assert len(target_type.dimensions) == len(self.dimensions)
+            for i, d in enumerate(target_type.dimensions):
+                if d is None:
+                    d = self.dimensions[i]
+                dim_exprs += [d.compile_expr(int)]
+        else:
+            dim_exprs = [d.compile_expr(int) for d in self.dimensions]
+        return _pyast.List(dim_exprs, _pyast.Load())
+    
+    def _lambda_type(self, target_type):
+        if hasattr(target_type, 'dimensions'):
+            return _pylambdatype([int for d in target_type.dimensions], target_type.basetype)
+        else:
+            return _pylambdatype([int for d in self.dimensions], target_type)
+    
+    def compile_expr(self, target_type):
+        assert target_type is not None
+        dims = self._compile_dims(target_type)
+        ltype = self._lambda_type(target_type)
+        
+        #def array(func,dims):
+        #    return [func(*d) for d in itertools.product(*(map(range,dims))]
+        elt_expr = _pyast.Call(self.genexpr.compile_expr(ltype), [], [], _pyast.Name('_d', _pyast.Load()), None)                  # func(*d)
+        # elt_expr = _pyast.Call(_pyast.Name('tuple', _pyast.Load()), [_pyast.Name('_d', _pyast.Load()), elt_expr], [], None, None) # tuple(d, func(*d))
+        pdt_expr = _pyast.Attribute(_pyast.Name('_pyitertools', _pyast.Load()), 'product', _pyast.Load())                            # itertools.product
+        itr_expr = _pyast.Call(_pyast.Name('map', _pyast.Load()), [_pyast.Name('range', _pyast.Load()), dims], [], None, None)    # map(range,dims)
+        itr_expr = _pyast.Call(pdt_expr, [], [], itr_expr, None)                                                                  # itertools.product(*(map(range,dims)))
+        return _pyast.ListComp(
+            elt_expr,
+            [_pyast.comprehension(_pyast.Name('_d', _pyast.Store()), itr_expr, [])])
+    
+    def getfreevars(self, glbls):
+        return set(
+            self.genexpr.getfreevars(glbls) |
+            _pyfunctools.reduce(lambda a,v: a | v.getfreevars(glbls), filter(lambda x: x is not None, self.dimensions), set()))
+    
+    def __str__(self):
+        return 'matrix([{}],{})'.format(','.join(map(str,self.dimensions)),str(self.genexpr))
+
+
+class _RandomExpr(_Expr):
+    def __init__(self, minexpr, maxexpr):
+        self.minexpr = minexpr
+        self.maxexpr = maxexpr
+        self.expr = _BinExpr(
+            _BinExpr(
+                _InvokeExpr(_AttributeExpr(_NameExpr('_pyrandom'),'random'),[]),
+                '*',
+                _BinExpr(maxexpr, '-', minexpr)),
+            '+',
+            minexpr)
+    
+    def getfreevars(self, glbls):
+        return self.minexpr.getfreevars(glbls) | self.maxexpr.getfreevars(glbls)
+    
+    def compile_expr(self, target_type):
+        if target_type == int:
+            return _pyast.Call(_pyast.Name('int', _pyast.Load()),[self.expr.compile_expr(float)],[],None,None)
+        elif target_type == float:
+            return self.expr.compile_expr(target_type)
+        elif target_type is None:
+            return self.expr.compile_expr(None)
+        assert False
+    
+    def __str__(self):
+        return 'random({},{})'.format(str(self.minexpr),str(self.maxexpr))
+
+
+### What to import from * ###
+addbindings = _addbindings
+
+CppType = _CppType
+CppPrimitiveType = _CppPrimitiveType
+CppVoidType = _CppVoidType
+CppArrayType = _CppArrayType
+CppPointerType = _CppPointerType
+
+ConstantExpr = _ConstantExpr
+NameExpr = _NameExpr
+AttributeExpr = _AttributeExpr
+BinExpr = _BinExpr
+UnaryExpr = _UnaryExpr
+LambdaExpr = _LambdaExpr
+InvokeExpr = _InvokeExpr
+MatrixGenerator = _MatrixGenerator
+RandomExpr = _RandomExpr
+
+Procedure = _Procedure
+Parameter = _Parameter
+
diff --git a/test-chill/testchill/_extract.py b/test-chill/testchill/_extract.py
new file mode 100644
index 0000000..f6984ac
--- /dev/null
+++ b/test-chill/testchill/_extract.py
@@ -0,0 +1,98 @@
+import collections
+import os
+import os.path
+import itertools
+import re
+
+from . import util
+
+if util.python_version_major == 2:
+    from HTMLParser import HTMLParser
+else:
+    from html.parser import HTMLParser
+
+class _TagExtractor(HTMLParser):
+    _comment_style_expr = {
+            'c':      [('/(/)+',r'[\n]'),(r'/\*',r'\*/')],
+            'cc':     [('/(/)+',r'[\n]'),(r'/\*',r'\*/')],
+            'cpp':    [('/(/)+',r'[\n]'),(r'/\*',r'\*/')],
+            'h':      [('/(/)+',r'[\n]'),(r'/\*',r'\*/')],
+            'hh':     [('/(/)+',r'[\n]'),(r'/\*',r'\*/')],
+            'hpp':    [('/(/)+',r'[\n]'),(r'/\*',r'\*/')],
+            'py':     [('#+',r'[\n]'),('\'\'\'',),('"""',)],
+            'script': [('#+',r'[\n]')],
+            'lua':    [(r'--\[\[',r'\]\]--')]
+        }
+    
+    def __init__(self, tagname):
+        HTMLParser.__init__(self)
+        self.tagname = tagname
+        self._readin = False
+        self._value = ''
+    
+    def handle_starttag(self, tag, attrs):
+        if tag == self.tagname:
+            self._readin = True
+            self._attrs = dict(attrs)
+    
+    def handle_endtag(self, tag):
+        if tag == self.tagname:
+            self._readin = False
+            self._tag_list.append((self._value, self._attrs))
+            self._value = ''
+    
+    def handle_data(self, txt):
+        if self._readin:
+            self._value += txt
+    
+    @classmethod
+    def _parse(cls, tagname, txt):
+        reader = cls(tagname)
+        reader._readin = False
+        reader._value = ''
+        reader._tag_list = []
+        reader.feed(txt)
+        return reader._tag_list
+    
+    @classmethod
+    def _get_commentstyles(cls, ext):
+        for comment_style in cls._comment_style_expr[ext]:
+            if len(comment_style) == 1:
+                start_expr = comment_style[0]
+                end_expr = comment_style[0]
+            elif len(comment_style) == 2:
+                start_expr = comment_style[0]
+                end_expr = comment_style[1]
+            yield start_expr, end_expr
+    
+    @classmethod
+    def _commented(cls, txt, ext):
+        comment_spans = list()
+        for start_expr, end_expr in cls._get_commentstyles(ext):
+            pos = 0
+            while pos < len(txt):
+                start_match = re.search(start_expr, txt[pos:])
+                if start_match:
+                    start_pos = pos + start_match.end()
+                    end_match = re.search(end_expr, txt[start_pos:])
+                    if end_match:
+                        end_pos = start_pos + end_match.start()
+                        pos = start_pos + end_match.end()
+                    else:
+                        end_pos = len(txt)
+                        pos = end_pos
+                    comment_spans.append((start_pos, end_pos))
+                else:
+                    break
+        for span in sorted(comment_spans, key=lambda s: s[0]):
+            yield txt[span[0]:span[1]]
+    
+    @classmethod
+    def extract_tag(cls, tagname, filename, wd=os.getcwd()):
+        with open(os.path.join(wd, filename), 'r') as f:
+            txt = f.read()
+        ext = filename.split('.')[-1]
+        return cls._parse(tagname, '\n'.join(cls._commented(txt, ext)))
+
+extract_tag = _TagExtractor.extract_tag
+
diff --git a/test-chill/testchill/chill.py b/test-chill/testchill/chill.py
new file mode 100644
index 0000000..b6d39cf
--- /dev/null
+++ b/test-chill/testchill/chill.py
@@ -0,0 +1,326 @@
+#TODO: Re-Document
+#TODO: highlight test implementation hooks
+
+import os
+import os.path
+
+from . import gcov
+from . import test
+from . import util
+from . import cpp_validate
+
+
+class ChillConfig(object):
+    def __init__(self, chill_dir=None, bin_dir=None, build_cuda=False, script_lang=None):
+        self.build_cuda = build_cuda
+        self.script_lang = script_lang
+        self.chill_dir = chill_dir
+        self.bin_dir = bin_dir
+        if self.script_lang is None:
+            self.script_lang = self.default_script_lang()
+    
+    def default_script_lang(self):
+        return 'python'
+    
+    def _buildfunc(self, cc, link=True):
+        if not link:
+            compile_args = ['-c -Wuninitialized']
+        elif link and cc == 'nvcc':
+            compile_args = ['-L/usr/local/cuda/lib64/lib', '-lcuda', '-lcudart', '-lstdc++', '-lrt', '-Wuninitialized']
+        else:
+            compile_args = ['-lstdc++', '-lrt', '-Wuninitialized']
+        
+        def build(src, dest, args=[], defines={}, wd=None):
+            if wd is None:
+                wd = os.path.dirname(src)
+            args += ['-D{}={}'.format(k,v) for k, v in defines.items()]
+            dest = os.path.join(wd, dest)
+            stdout = util.shell(cc, args + [src, '-o', dest] + compile_args, wd=wd)
+            return dest, stdout
+        return build
+    
+    def compile_src_func(self):
+        return self._buildfunc('gcc', False)
+    
+    def compile_gensrc_func(self):
+        if self.build_cuda:
+            return self._buildfunc('nvcc', False)
+        else:
+            return self._buildfunc('gcc', False)
+    
+    def build_src_func(self):
+        return self._buildfunc('gcc')
+    
+    def build_gensrc_func(self):
+        if self.build_cuda:
+            return self._buildfunc('nvcc')
+        else:
+            return self._buildfunc('gcc')
+    
+    @property
+    def config_args(self):
+        args = []
+        if self.build_cuda:
+            args += ['--enable-cuda']
+        if self.script_lang is not None:
+            args += ['--with-' + self.script_lang]
+        return args
+    
+    @property
+    def buildname(self):
+        if self.build_cuda:
+            return 'cudachill'
+        else:
+            return 'chill'
+    
+    @property
+    def name(self):
+        if self.buildname == 'cudachill':
+            return 'cuda-chill-' + self.script_lang
+        else:
+            return 'chill-' + self.script_lang
+    
+    @staticmethod
+    def ext_to_script_lang(ext):
+        return {'script':'script', 'lua':'lua', 'py':'python'}[ext]
+    
+    @staticmethod
+    def configs(chill_dir, bin_dir, build_cuda=None, script_lang=None):
+        all_configs = [
+                (False, 'script'),
+                (False, 'lua'),
+                (False, 'python'),
+                (True, 'lua'),
+                (True, 'python')]
+                
+        pred_list = [lambda x: True]
+        if not build_cuda is None:
+            pred_list += [lambda x: x[0] == build_cuda]
+        if not script_lang is None:
+            pred_list += [lambda x: x[1] == script_lang]
+        
+        cond = lambda x: all(p(x) for p in pred_list)
+        
+        return iter(ChillConfig(chill_dir, bin_dir, *conf) for conf in filter(cond, all_configs))
+
+
+# -                               - #
+# -  Test case for building chill - #
+# -                               - #
+class BuildChillTestCase(test.TestCase):
+    """
+    Test case for building chill.
+    """
+    
+    default_options = {
+            'coverage': False   # compile for coverage
+        }
+    
+    def __init__(self, config, options={}, coverage_set=None):
+        """
+        @param config chill configuration object
+        @param options options for building chill and testing the build process
+        @param coverage_set GcovSet object to record coverage
+        """
+        assert isinstance(config, ChillConfig)
+        if config.script_lang == None:
+            config.script_lang = config.default_script_lang()
+        self.config = config
+        super(BuildChillTestCase,self).__init__(self.config.name)
+        self._set_options(options, coverage_set)
+    
+    def _set_options(self, options, coverage_set):
+        self.options = dict(BuildChillTestCase.default_options)
+        self.options.update(options)
+        
+        if self.options['coverage']:
+            coverage_set.addprogram(self.config.name, self.config.chill_dir)
+    
+    def setUp(self):
+        """
+        Called before run, outside of the context of a test case
+        """
+        # clean up any coverage files from a previous build
+        util.shell('rm', ['-f', '*.gcno'], wd=self.config.chill_dir)
+        util.shell('rm', ['-f', '*.gcov'], wd=self.config.chill_dir)
+        util.shell('rm', ['-f', '*.gcda'], wd=self.config.chill_dir)
+        
+        util.shell('make clean', wd=self.config.chill_dir)
+    
+    def run(self):
+        """
+        Build chill
+        """
+        util.shell('make', ['distclean'], wd=self.config.chill_dir)
+        util.shell('./configure', self.config.config_args, wd=self.config.chill_dir)
+        util.shell('make', [], wd=self.config.chill_dir)
+        
+        #util.shell('make', ['clean'], wd=self.config.chill_dir)
+        #util.shell('make', ['veryclean'], wd=self.config.chill_dir)
+        #util.shell('make', [depend_target] + [self.build_args], env=self.build_env, wd=self.config.chill_dir)
+        #util.shell('make', [target] + [self.build_args], env=self.build_env, wd=self.config.chill_dir)
+        return self.make_pass()
+        
+    def tearDown(self):
+        """
+        Called after run, outside of the context of a test case.
+        If a binary directory is specified, rename and move the executable there, otherwise, just rename it.
+        """
+        if self.test_result.passed():
+            if self.config.bin_dir:
+                util.shell('mv', [os.path.join(self.config.chill_dir, self.config.buildname), os.path.join(self.config.bin_dir, self.config.name)])
+            elif not self.config.buildname == self.config.name:
+                util.shell('mv', [os.path.join(self.config.chill_dir, self.config.buildname), os.path.join(self.config.chill_dir, self.config.name)])
+
+
+# -                              - #
+# -  Test case for running chill - #
+# -                              - #
+class RunChillTestCase(test.SequencialTestCase):
+    """
+    Test case for running and testing chill.
+    """
+    
+    default_options={
+            'compile-src':True,              # Compile original source file
+            'run-script':True,               # Run chill script
+            'compile-gensrc':True,           # Compile generated source file
+            'check-run-script-stdout':False, # Diff stdout from run_script() against an expected value (from a .stdout file)
+            'coverage':False,                # Record coverage
+            
+            'fail-compile-src':False,        # Expect compile_src to fail (TODO: not implemented)
+            'fail-run-script':False,         # Expect run_script to fail  (TODO: not implemented)
+        }
+    
+    def __init__(self, config, chill_script, chill_src, wd=None, options={}, coverage_set=None):
+        """
+        @param config Chill configuration object
+        @param chill_script The path to the chill script.
+        @param chill_src The path to the source file that the script uses.
+        @param wd The working directory. Where the script will be executed, compiled, and tested.
+        @param options Additional testing options.
+        @param coverage_set GcovSet object to record coverage
+        """
+        if config.script_lang == None:
+            config.script_lang = ChillConfig.ext_to_script_lang(chill_script.split('.')[-1])
+        
+        assert isinstance(config, ChillConfig)
+        
+        super(RunChillTestCase,self).__init__(config.name + ':' + os.path.basename(chill_script))
+        
+        self.config = config
+        self.wd = wd if (wd != None) else os.getcwd()
+        
+        self.chill_src_path = os.path.abspath(chill_src)
+        self.chill_script_path = os.path.abspath(chill_script)
+        self.chill_bin = os.path.join(self.config.bin_dir, self.config.name)
+        self.chill_src = os.path.basename(self.chill_src_path)
+        self.chill_script = os.path.basename(self.chill_script_path)
+        self.chill_gensrc = self._get_gensrc(self.chill_src)
+        self.chill_gensrc_path = os.path.join(self.wd, self.chill_gensrc)
+        
+        self.compile_src_func = self.config.compile_src_func()
+        self.compile_gensrc_func = self.config.compile_gensrc_func()
+        self.build_src_func = self.config.build_src_func()
+        self.build_gensrc_func = self.config.build_gensrc_func()
+        
+        self._set_options(options, coverage_set)
+
+    def _set_options(self, options, coverage_set=None):
+        self.options = dict(RunChillTestCase.default_options)
+        self.options.update(options)
+        
+        self.out = dict()
+        self.expected = dict()
+        
+        if self.options['compile-src']:
+            self.add_subtest('compile-src', self.compile_src)
+        if self.options['run-script']:
+            self.add_subtest('run-script', self.run_script)
+        if self.options['compile-gensrc']:
+            self.add_subtest('compile-generated-src', self.compile_gensrc)
+        self.add_subtest('check-run-script-validate', self.check_run_script_validate)
+        if self.options['check-run-script-stdout']:
+            self.add_subtest('check-run-script-stdout', self.check_run_script_stdout)
+            with open('.'.join(self.chill_script_path.split('.')[0:-1] + ['stdout']), 'r') as f:
+                self.expected['run_script.stdout'] = f.read()
+        self.coverage_set = coverage_set
+    
+    def _get_gensrc(self, src):
+        """
+        The name of the generated source file.
+        """
+        if not self.config.build_cuda:
+            return 'rose_' + src
+        else:
+            return 'rose_' + '.'.join(src.split('.')[0:-1]) + '.cu'
+    
+    def setUp(self):
+        """
+        Called before any tests are performed. Moves source and script files into the working directory
+        and removes any gcov data files
+        """
+        util.shell('cp', [self.chill_src_path, self.chill_src], wd=self.wd)
+        util.shell('cp', [self.chill_script_path, self.chill_script], wd=self.wd)
+        #TODO: check for chill binary
+    
+    def tearDown(self):
+        """
+        Called when the test is complete
+        """
+        util.shell('rm', ['-f', self.chill_src], wd=self.wd)
+        util.shell('rm', ['-f', self.chill_script], wd=self.wd)
+        util.shell('rm', ['-f', self.chill_gensrc], wd=self.wd)
+        if self.options['coverage'] and self.coverage_set is not None:
+            self.coverage_set.addcoverage(self.config.name, self.name)
+    
+    # -             - #
+    # - Chill Tests - #
+    # -             - #
+    
+    def compile_src(self, tc):
+        """
+        Attempts to compile the source file before any transformation is performed. Fails if gcc fails.
+        """
+        #self.out['compile_src.stdout'] = util.shell('gcc', ['-c', self.chill_src], wd=self.wd)
+        _, self.out['compile_src.stdout'] = self.compile_src_func(self.chill_src, util.mktemp(), wd=self.wd)
+        return tc.make_pass()
+    
+    def run_script(self, tc):
+        """
+        Attempts to run the script file. Fails if chill exits with a non-zero result.
+        """
+        # look for cudaize.lua for cuda-chill
+        if self.config.build_cuda and not os.path.exists(os.path.join(self.wd, 'cudaize.lua')):
+            return test.TestResult.make_error(test.FailedTestResult, tc, reason='cudaize.lua was missing from the working directory.')
+        self.out['run_script.stdout'] = util.shell(self.chill_bin, [self.chill_script], wd=self.wd)
+        return tc.make_pass()
+    
+    def compile_gensrc(self, tc):
+        """
+        Attempts to compile the generated source file. Fails if gcc fails.
+        """
+        #self.out['compile_gensrc.stdout'] = util.shell('gcc', ['-c', self.chill_gensrc], wd=self.wd)
+        _, self.out['compile_gensrc.stdout'] = self.compile_gensrc_func(self.chill_gensrc_path, util.mktemp(), wd=self.wd)
+        return tc.make_pass()
+    
+    def check_run_script_validate(self, tc):
+        """
+        Generate test data and run both the original source and generated source against it.
+        Fail if any test procedure generates different output.
+        """
+        for name, (is_valid, is_faster) in cpp_validate.run_from_src(self.chill_src, self.chill_gensrc, self.build_src_func, self.build_gensrc_func, wd=self.wd):
+            self.out['check_run_script_validate.{}'.format(name)] = (is_valid, is_faster)
+            if not is_valid:
+                return tc.make_fail('test procedure {} returned invalid results.'.format(name))
+        return tc.make_pass()
+    
+    def check_run_script_stdout(self, tc):
+        """
+        Diff stdout from run_script against an expected stdout
+        """
+        isdiff, diff = util.isdiff(self.out['run_script.stdout'], self.expected['run_script.stdout'])
+        if isdiff:
+            return test.TestResult.make_fail(test.FailedTestResult, tc, reason='Diff:\n' + diff)
+        return tc.make_pass()
+    
diff --git a/test-chill/testchill/cpp_validate.py b/test-chill/testchill/cpp_validate.py
new file mode 100644
index 0000000..5f19a12
--- /dev/null
+++ b/test-chill/testchill/cpp_validate.py
@@ -0,0 +1,165 @@
+import collections
+import os
+import pickle
+import re
+
+from . import util
+
+_script_parser = None
+def _get_script_parser():
+    """
+    Retrieve the test code generator language parser.
+    """
+    global _script_parser
+    if _script_parser is None:
+        with open('testchill/cpp_validate/parser.pickle','rb') as f:
+            _script_parser = pickle.load(f)
+    return _script_parser
+
+def _parse_testproc_python(txt, glbls=None):
+    """
+    Parse text as a python testchill._cpp_validate_env.Procedure object"
+    @param txt Python code to be parsed.
+    @param glbls A python global dict.
+    """
+    if glbls is None:
+        glbls = dict()
+    exec('import testchill._cpp_validate_env\nfrom testchill._cpp_validate_env import *', None, glbls)
+    return eval(txt, glbls)
+
+def _parse_testproc_script(txt, glbls=None):
+    """
+    Parse text as test code generator language.
+    @param txt Code to be parsed.
+    @param glbls A python global dict.
+    """
+    parser = _get_script_parser()
+    proc = list(parser.parse(util.textstream(txt)))[0]
+    if glbls is None:
+        from . import _cpp_validate_env
+        glbls = dict()
+        return _cpp_validate_env.addbindings(proc, glbls)
+    else:
+        return proc
+
+def _parse_testproc_iter(srcfile, wd=os.getcwd()):
+    """
+    Parse all test procedures from a file.
+    @param srcfile File path to parse.
+    @param wd Working directory.
+    """
+    default_attrs = {'lang':'script', 'define':'dict()'}
+    for txt, parsed_attrs in util.extract_tag('test', srcfile, wd):
+        attrs = collections.defaultdict(lambda: None)
+        attrs.update(default_attrs)
+        attrs.update(parsed_attrs)
+        if attrs['lang'] == 'python':
+            yield _parse_testproc_python(txt), attrs
+        if attrs['lang'] == 'script':
+            yield _parse_testproc_script(txt), attrs
+
+#def _compile_gpp(src, dest):
+#    """
+#    Compile a signle C++ source file into an executable object.
+#    @param src Source file path.
+#    @param dest Object file path.
+#    """
+#    util.shell('g++', ['-o', dest, src, '-lrt'])
+
+def _test_time(control_time, test_time):
+    """
+    Determine if test ran faster than control.
+    @param control_time Time taken by control.
+    @param test_time Time taken by test.
+    """
+    return control_time > test_time
+
+def _test_validate(control_dataout_path, test_dataout_path):
+    """
+    Determine if control and test computed the same values.
+    @param control_dataout_path Path to the file writen by control.
+    @param test_dataout_path Path to the file writen by test.
+    """
+    with open(control_dataout_path, 'rb') as controlfile:
+        with open(test_dataout_path, 'rb') as testfile:
+            return controlfile.read() == testfile.read()
+
+def _run_test_validate_time(control_obj_path, test_obj_path, datain_path):
+    control_dataout_path = util.mktemp()
+    test_dataout_path = util.mktemp()
+    control_time, = eval(util.shell(os.path.abspath(control_obj_path), [datain_path, control_dataout_path]))
+    test_time, = eval(util.shell(os.path.abspath(test_obj_path), [datain_path, test_dataout_path]))
+    return _test_validate(control_dataout_path, test_dataout_path), _test_time(control_time, test_time)
+
+#def _run_test_validate_time(control_obj_path, test_obj_path, datain_path, wd):
+    #control_obj_path = '.'.join(control_src_path.split('.')[:-1])
+    #test_obj_path = '.'.join(test_src_path.split('.')[:-1])
+    
+    
+    
+    #util.set_tempfile(control_obj_path)
+    #util.set_tempfile(test_obj_path)
+    #_compile_gpp(control_src_path, control_obj_path)
+    #_compile_gpp(test_src_path, test_obj_path)
+    
+    #test_validate, test_time = _run_test_validate_time(control_obj_path, test_obj_path, datain_path)
+    #return test_validate, test_time
+
+def _generate_initial_data(test_proc, srcfile, defines, wd=os.getcwd()):
+    filename = os.path.join(wd, os.path.basename(srcfile)) + '.data'
+    with open(filename, 'wb') as f:
+        for p_name, p_type, p_dims, p_data in test_proc.generatedata(['in', 'inout'], defines):
+            f.write(p_data)
+        for p_name, p_type, p_dims, p_data in test_proc.generatedata(['out'], defines):
+            f.write(p_data)
+    return filename
+
+def _format_insertion_dict(test_proc, src_path, defines):
+    with open(src_path, 'r') as src_file:
+        return {
+                'defines'      : '\n'.join(['#define {} {}'.format(k,v) for k,v in defines.items()]),
+                'test-proc'    : src_file.read(),
+                'declarations' : '\n'.join(test_proc.generatedecls(defines)),
+                'read-in'      : '\n'.join(test_proc.generatereads(['in','inout'], 'datafile_initialize', defines)),
+                'read-out'     : '\n'.join(test_proc.generatereads(['out'], 'datafile_initialize', defines)),
+                'run'          : test_proc.getinvokestr(),
+                'write-out'    : '\n'.join(test_proc.generatewrites('datafile_out', defines)),
+            }
+
+def _write_generated_code(test_proc, src_path, defines, dest_filename, wd):
+    insertion_dict = _format_insertion_dict(test_proc, src_path, defines)
+    dest_file_path = os.path.join(wd, dest_filename)
+    with open('testchill/cpp_validate/src/validate.cpp', 'r') as template_file:
+        with open(dest_file_path, 'w') as destfile:
+            template_text = template_file.read()
+            desttext = template_text
+            for match in re.finditer(r'(?P<indent>[ \t]*)//# (?P<name>[^\s]+)', template_text):
+                destlines = insertion_dict[match.group('name')].splitlines()
+                indent = match.group('indent')
+                match_text = match.group()
+                repl_text = '\n'.join([indent + line for line in destlines])
+                desttext = desttext.replace(match_text, repl_text)
+            destfile.write(desttext)
+    return dest_file_path
+
+def run_from_src(control_src, test_src, build_control_func, build_test_func, wd=os.getcwd()):
+    control_src_path = os.path.join(wd, control_src)
+    test_src_path = os.path.join(wd, test_src)
+    gen_control_obj_path = os.path.join(wd, 'control_obj')
+    gen_test_obj_path = os.path.join(wd, 'test_obj')
+    for test_proc, attrs in _parse_testproc_iter(control_src, wd):
+        defines = eval(attrs['define'])
+        datafile = _generate_initial_data(test_proc, control_src_path, defines, wd=wd)
+        gen_control_src = _write_generated_code(test_proc, control_src_path, defines, 'gen_control.cc', wd)
+        gen_test_src = _write_generated_code(test_proc, test_src_path, defines, 'gen_test.cc', wd)
+        gen_control_obj, _ = build_control_func(gen_control_src, gen_control_obj_path)
+        gen_test_obj, _ = build_test_func(gen_test_src, gen_test_obj_path)
+        util.set_tempfile(gen_control_obj)
+        util.set_tempfile(gen_test_obj)
+        yield attrs['name'], _run_test_validate_time(gen_control_obj, gen_test_obj, datafile)
+
+def parse_defines_iter(src, wd=os.getcwd()):
+    for txt, attrs in util.extract_tag('test', src, wd):
+        if 'define' in attrs.keys():
+            yield eval(attrs['define'])
+
diff --git a/test-chill/testchill/cpp_validate/grammar.txt b/test-chill/testchill/cpp_validate/grammar.txt
new file mode 100644
index 0000000..fdb8c00
--- /dev/null
+++ b/test-chill/testchill/cpp_validate/grammar.txt
@@ -0,0 +1,124 @@
+terminals:
+    Identifier     '[a-zA-Z_][a-zA-Z_0-9]*'
+    NumericLiteral '[0-9]+(\.[0-9]+)?'
+    Comment        '\#([^\x0a])*'
+    WS             '\s+'
+ignore: WS, <NL>, Comment
+rules:
+<proc-unit> ::=
+    <with-stmt>:w                                           => w
+    <proc>:p                                                => p
+<with-stmt> ::=
+    'with' '{' <with-decl-list-opt>:decls '}' <proc-unit>:p => addbindings(p, dict(decls))
+<with-decl-list-opt> ::=
+    eps                                                     => []
+    <with-decl-list>:l                                      => l
+<with-decl-list> ::=
+    <with-decl-list>:l ',' <with-decl>:decl                 => l + [decl]
+    <with-decl>:decl                                        => [decl]
+<with-decl> ::=
+    Identifier:name ':' <expr>:e                            => (name, (None, e))
+    <c-type>:ctype Identifier:name ':' <expr>:e             => (name, (ctype, e))
+    
+<proc> ::=
+    'procedure' <c-type>:rtype Identifier:name '(' <param-list-opt>:plist ')'
+                                                            => Procedure(name, rtype, plist)
+<c-type> ::=
+    <c-type>:bt '*'                                         => CppPointerType(bt)
+    <c-type>:bt <c-array-dim-list>:dims                     => CppArrayType(bt, dims)
+    'void'                                                  => CppVoidType()
+    'char'                                                  => CppPrimitiveType.get_from_cppname('char')
+    'signed' 'char'                                         => CppPrimitiveType.get_from_cppname('signed char')
+    'unsigned' 'char'                                       => CppPrimitiveType.get_from_cppname('unsigned char')
+    'short'                                                 => CppPrimitiveType.get_from_cppname('short')
+    'unsigned' 'short'                                      => CppPrimitiveType.get_from_cppname('unsigned short')
+    'int'                                                   => CppPrimitiveType.get_from_cppname('int')
+    'unsigned' 'int'                                        => CppPrimitiveType.get_from_cppname('unsigned int')
+    'long'                                                  => CppPrimitiveType.get_from_cppname('long')
+    'unsigned' 'long'                                       => CppPrimitiveType.get_from_cppname('unsigned long')
+    'long' 'long'                                           => CppPrimitiveType.get_from_cppname('long long')
+    'unsigned' 'long' 'long'                                => CppPrimitiveType.get_from_cppname('unsigned long long')
+    'float'                                                 => CppPrimitiveType.get_from_cppname('float')
+    'double'                                                => CppPrimitiveType.get_from_cppname('double')
+<c-array-dim-list> ::=
+    <c-array-dim-list>:dlist '[' <expr>:e ']'               => dlist + [e]
+    <c-array-dim-list>:dlist '[' ']'                        => dlist + [None]
+    '[' ']'                                                 => [None]
+    '[' <expr>:e ']'                                        => [e]
+<param-list-opt> ::=
+    eps                                                     => []
+    <param-list>:l                                          => l
+<param-list> ::=
+    <param-list>:l ',' <param>:p                            => l + [p]
+    <param>:p                                               => [p]
+<param> ::=
+    <direction>:d <c-type>:t Identifier:name '=' <expr>:e   => Parameter(name, t, d, e)
+    <direction>:d <c-type>:t Identifier:name                => Parameter(name, t, d, None)
+<direction> ::=
+    'in'                                                    => 'in'
+    'out'                                                   => 'out'
+    'in' 'out'                                              => 'inout'
+    'out' 'in'                                              => 'inout'
+    eps                                                     => 'inout'
+
+
+<expr> ::=
+    <add-expr>:e                                            => e
+    'lambda' <id-list-opt>:params ':' <expr>:e              => LambdaExpr(params, e)
+    'matrix' '(' <dim-list-expr>:d ',' <expr>:e ')'         => MatrixGenerator(d, e)
+    'matrix' <named-dim-list-expr>:dims <expr>:e            => MatrixGenerator([d[1] for d in dims], LambdaExpr([d[0] for d in dims], e))
+<add-expr> ::=
+    <add-expr>:l '+' <mul-expr>:r                           => BinExpr(l, '+', r)
+    <add-expr>:l '-' <mul-expr>:r                           => BinExpr(l, '-', r)
+    <mul-expr>:e                                            => e
+<mul-expr> ::=
+    <mul-expr>:l '*' <prefix-expr>:r                        => BinExpr(l, '*', r)
+    <mul-expr>:l '/' <prefix-expr>:r                        => BinExpr(l, '/', r)
+    <prefix-expr>:e                                         => e
+<prefix-expr> ::=
+    '-' <prefix-expr>:e                                     => UnaryExpr('-', e)
+    <postfix-expr>:e                                        => e
+<postfix-expr> ::=
+    <pow-expr>:e                                            => e
+<pow-expr> ::=
+    <term-expr>:l '**' <pow-expr>:r                         => BinExpr(l, '**', r)
+    <term-expr>:e                                           => e
+<term-expr> ::=
+    '(' <expr>:e ')'                                        => e
+    '[' <expr-list-opt>:l ']'                               => l
+    Identifier:name                                         => NameExpr(name)
+    NumericLiteral:num                                      => ConstantExpr(num)
+    'random' '(' <expr>:mn ',' <expr>:mx ')'                => RandomExpr(mn, mx)
+    <term-expr>:f '(' <expr-list-opt>:l ')'                 => InvokeExpr(f, l)
+    <term-expr>:n '.' Identifier:attr                       => AttributeExpr(n, attr)
+<expr-list-opt> ::=
+    eps                                                     => []
+    <expr-list>:l                                           => l
+<expr-list> ::=
+    <expr-list>:l ',' <expr>:e                              => l + [e]
+    <expr>:e                                                => [e]
+<dim-list-expr> ::=
+    '[' <dim-expr-list>:l ']'                               => l
+<dim-expr-list> ::=
+    <dim-expr-list>:l ',' <dim-expr>:e                      => l + [e]
+    <dim-expr>:e                                            => [e]
+<dim-expr> ::=
+    eps                                                     => None
+    '*'                                                     => None
+    <expr>:e                                                => e
+<id-list-opt> ::=
+    eps                                                     => []
+    <id-list>:l                                             => l
+<id-list> ::=
+    <id-list>:l ',' Identifier:ident                        => l + [ident]
+    Identifier:ident                                        => [ident]
+<named-dim-list-expr> ::=
+    '[' <named-dim-expr-list>:l ']'                         => l
+<named-dim-expr-list> ::=
+    <named-dim-expr-list>:l ',' <named-dim-expr>:e          => l + [e]
+    <named-dim-expr>:e                                      => [e]
+<named-dim-expr> ::=
+    Identifier:name                                         => (name, None)
+    Identifier:name ':' <expr>:e                            => (name, e)
+    
+
diff --git a/test-chill/testchill/cpp_validate/src/validate.cpp b/test-chill/testchill/cpp_validate/src/validate.cpp
new file mode 100644
index 0000000..f09009d
--- /dev/null
+++ b/test-chill/testchill/cpp_validate/src/validate.cpp
@@ -0,0 +1,29 @@
+#include <time.h>
+#include <fstream>
+#include <cstdio>
+
+//# defines
+//# test-proc
+
+int main(int argc, char** argv) {
+    //# declarations
+    timespec start_time;
+    timespec end_time;
+    
+    std::ifstream datafile_initialize(argv[1]);
+    //# read-in
+    //# read-out
+    datafile_initialize.close();
+    
+    clock_gettime(CLOCK_REALTIME, &start_time);
+    //# run
+    clock_gettime(CLOCK_REALTIME, &end_time);
+    
+    std::ofstream datafile_out(argv[2]);
+    //# write-out
+    datafile_out.close();
+    
+    double time_diff = (end_time.tv_sec - start_time.tv_sec) + (end_time.tv_nsec - start_time.tv_nsec)/1000000000.0;
+    std::printf("(%f,)", time_diff);
+    return 0;
+}
diff --git a/test-chill/testchill/gcov.py b/test-chill/testchill/gcov.py
new file mode 100644
index 0000000..668c00e
--- /dev/null
+++ b/test-chill/testchill/gcov.py
@@ -0,0 +1,224 @@
+from __future__ import print_function
+import functools
+import itertools
+import os
+import os.path
+import sys
+
+from . import util
+
+class GcovFile(object):
+    def __init__(self, src_file_name, cov_file_path, lines, properties):
+        """
+        @param src_file_name Name of the source file.
+        @param cov_file_path Full path to the coverage file.
+        @param lines List of GcovLine objects.
+        @param properties Properties from the coverage file.
+        """
+        self.src_file_name = src_file_name
+        self.cov_file_path = cov_file_path
+        self.lines = lines
+        self.properties = properties
+    
+    @staticmethod
+    def parse_file(gcov, fname, process=None):
+        """
+        Parse a file into a GcovFile object.
+        @param gcov Gcov object that tis file is a part of.
+        @param gname File name.
+        @param process Process name
+        """
+        util.shell('gcov', [fname], wd=gcov.srcdir)
+        cov_file_path = os.path.join(gcov.srcdir, fname + '.gcov')
+        src_file_name = fname
+        if os.path.exists(cov_file_path):
+            with open(cov_file_path, 'r') as f:
+                lines, properties = GcovFile.parse_lines(f.readlines(), process)
+            return GcovFile(src_file_name, cov_file_path, lines, properties)
+        else:
+            return None
+    
+    @staticmethod
+    def parse_lines(str_lines, process):
+        """
+        Parse a string from a coverage file into a list of GcovLine objects.
+        @param str_lines Full text of a coverage file.
+        @param process Name of the process that executed the code.
+        """
+        properties = dict()
+        lines = []
+        for line in str_lines:
+            if line[-1] == '\n':
+                line = line[0:-1]
+            pline = line.split(':')
+            pline = list(map(str.strip, pline[0:2])) + pline[2:]
+            if pline[1] == '0':
+                properties[pline[2]] = pline[3].strip()
+            elif pline[0][0] == '-':
+                lines.append(GcovLine(int(pline[1]), dict(), ':'.join(pline[2:])))
+            elif pline[0][0] == '#':
+                lines.append(GcovLine(int(pline[1]), {process : 0}, ':'.join(pline[2:])))
+            else:
+                lines.append(GcovLine(int(pline[1]), {process : int(pline[0])}, ':'.join(pline[2:])))
+        return lines, properties
+    
+    @staticmethod
+    def union(left, right):
+        """
+        Merge two different coverages of the same file into a single coverage object.
+        """
+        return left | right
+    
+    def __or__(self, right):
+        """
+        Merge two different coverages of the same file into a single coverage object.
+        """
+        new_file = self.clone()
+        new_file.merge(right)
+        return new_file
+    
+    def __ior__(self, right):
+        """
+        Merge two different coverages of the same file into a single coverage object.
+        """
+        self.merge(right)
+        return self
+    
+    def merge(self, other):
+        """
+        Merge another coeverage into self.
+        """
+        assert self.src_file_name == other.src_file_name
+        GcovLine.merge_lines(self.lines, other.lines)
+        self.properties.update(other.properties)
+    
+    def clone(self):
+        """
+        Create a shallow clone.
+        """
+        return GcovFile(self.src_file_name, self.cov_file_path, list(self.lines), dict(self.properties))
+
+
+class GcovLine(object):
+    def __init__(self, lineno, count_by_process, code):
+        """
+        @param lineno Line number.
+        @param count_by_prcess A dictionary of execution counts by name of the process that executed them.
+        @param code Source code from this line.
+        """
+        self.lineno = lineno
+        self.count_by_process = count_by_process
+        self.code = code
+    
+    @staticmethod
+    def merge_lines(lines, other_lines):
+        """
+        Merge lines from other_line into lines.
+        """
+        for line, other_line in zip(lines, other_lines):
+            assert line.lineno == other_line.lineno
+            assert line.code == other_line.code
+            line.count_by_process.update(other_line.count_by_process)
+    
+    def count(self):
+        """
+        The total number of times this line was executed.
+        """
+        runable_list = [l for l in self.count_by_process.values() if l is not None]
+        if len(runable_list) == 0:
+            return None
+        else:
+            return sum(runable_list)
+    
+    def __repr__(self):
+        return str((self.lineno, self.count_by_process, self.code))
+
+
+class Gcov(object):
+    def __init__(self, srcdir):
+        self.srcdir = srcdir
+        self.files = dict()
+    
+    @staticmethod
+    def parse(srcdir, process=None):
+        gcov = Gcov(srcdir)
+        gcov._append(filter(lambda f: f is not None, map(functools.partial(GcovFile.parse_file, gcov, process=process),
+                util.filterext(['cc','c','cpp','h','hh'], os.listdir(srcdir)))))
+        return gcov
+    
+    def _append(self, files):
+        for f in files:
+            if f.src_file_name in self.files:
+                self.files[f.src_file_name].merge(f)
+            else:
+                self.files[f.src_file_name] = f
+    
+    def __or__(self, right):
+        new_cov = self.clone()
+        new_cov.merge(right)
+        return new_cov
+    
+    def __ior__(self, right):
+        self.merge(right)
+        return self
+    
+    @staticmethod
+    def union(left, right):
+        return left | right
+    
+    def merge(self, other):
+        self._append(other.files.values())
+    
+    def clone(self):
+        new_cov = Gcov(self.srcdir)
+        new_cov._append(iter(f.clone() for f in self.files.values()))
+        return new_cov
+
+
+class GcovSet(object):
+    def __init__(self):
+        self.coverage_by_program = dict()
+    
+    def addprogram(self, prog_name, src_dir):
+        self.coverage_by_program[prog_name] = Gcov(src_dir)
+    
+    def addcoverage(self, prog_name, process_name):
+        cov = self.coverage_by_program[prog_name]
+        cov.merge(Gcov.parse(cov.srcdir, process_name))
+    
+    #def unexecuted_lines(self):
+    #    covlist = sorted(self.coverage_by_program.values(), key=lambda c: c.srcdir)
+    #    for src, grp in itertools.groupby(covlist, lambda c: c.srcdir):
+    #        files = functools.reduce(lambda a, c: a | c, grp).files.values()
+    #        file_lines = iter((f.src_file_name, iter(l for l in f.lines if l.count() == 0)) for f in files)
+    #        yield src, file_lines
+    #
+    #def pretty_print(self, outfile=sys.stdout, width=60, stats=['unexecuted', 'unexecuted.bysrc']):
+    #    print('='*width, file=outfile)
+    #    print('  CODE COVERAGE', file=outfile)
+    #    
+    #    if 'unexecuted' in stats:
+    #        print('='*width, file=outfile)
+    #        print('    unexecuted lines', file=outfile)
+    #        if 'unexecuted.bysrc' in stats:
+    #            for src, file_lines in self.unexecuted_lines():
+    #                print((src + ':'), file=outfile)
+    #                print('-'*width, file=outfile)
+    #                for src_file_name, lines in file_lines:
+    #                    print('  ' + src_file_name + ':', file=outfile)
+    #                    for line in lines:
+    #                        print("{}:{}".format(str(line.lineno).rjust(5), line.code), file=outfile)
+    #    #print('='*width, file=outfile)
+    #    #print(prog, file=outfile)
+    #    #print('-'*width, file=outfile)
+    
+    def _get_coverage_by_file(self):
+        return functools.reduce(lambda a,b: a|b, self.coverage_by_program.values()).files
+    
+    def _get_filenames(self):
+        return self.coverage_by_file.keys()
+    
+    coverage_by_file = property(_get_coverage_by_file)
+    filenames = property(_get_filenames)
+
+
diff --git a/test-chill/testchill/omega.py b/test-chill/testchill/omega.py
new file mode 100644
index 0000000..962333a
--- /dev/null
+++ b/test-chill/testchill/omega.py
@@ -0,0 +1,29 @@
+from . import test
+from . import util
+
+
+
+class BuildOmegaTestCase(test.TestCase):
+    def __init__(self, omega_dir, version='dev'):
+        super(BuildOmegaTestCase, self).__init__(BuildOmegaTestCase.getname(version))
+        self.omega_dir = omega_dir
+        self.version = version
+    
+    @staticmethod
+    def getname(version):
+        if version == 'release':
+            return 'omega-release'
+        else:
+            return 'omega'
+    
+    def setUp(self):
+        util.shell('make clean', wd=self.omega_dir)
+    
+    def tearDown(self):
+        pass
+    
+    def run(self):
+        util.shell('make depend', wd=self.omega_dir)
+        util.shell('make', wd=self.omega_dir)
+
+
diff --git a/test-chill/testchill/test.py b/test-chill/testchill/test.py
new file mode 100644
index 0000000..c38b98a
--- /dev/null
+++ b/test-chill/testchill/test.py
@@ -0,0 +1,381 @@
+from __future__ import print_function
+#TODO: test dependencies
+#TODO: expected failures
+import itertools
+import io
+import logging
+import pprint
+import sys
+import traceback
+
+from . import util
+
+
+class TestResult(object):
+    """
+    The base class for all test results.
+    """
+    _pass = 'pass'
+    _error = 'error'
+    _fail = 'fail'
+    _skipped = 'skipped'
+    
+    def __init__(self, testcase, status):
+        self.testcase_name = testcase.name
+        self.status = status
+        testcase.setresult(self)
+    
+    @staticmethod
+    def make_pass(result_type, testcase, *args, **kwargs):
+        """
+        Create and return a passing test result of type result_type.
+        @param result_type A class that extends TestResult
+        @param testcase The test case that generated the result
+        @param *args Additional positional arguments to be passed to result_type.__init__
+        @param *kwargs Keyword arguments to be passed to result_type.__init__
+        """
+        return result_type(testcase, TestResult._pass, *args, **kwargs)
+    
+    @staticmethod
+    def make_error(result_type, testcase, *args, **kwargs):
+        """
+        Create and return a errored test result of type result_type.
+        @param result_type A class that extends TestResult
+        @param testcase The test case that generated the result
+        @param *args Additional positional arguments to be passed to result_type.__init__
+        @param *kwargs Keyword arguments to be passed to result_type.__init__
+        """
+        return result_type(testcase, TestResult._error, *args, **kwargs)
+    
+    @staticmethod
+    def make_fail(result_type, testcase, *args, **kwargs):
+        """
+        Create and return a failed test result of type result_type.
+        @param result_type A class that extends TestResult
+        @param testcase The test case that generated the result
+        @param *args Additional positional arguments to be passed to result_type.__init__
+        @param *kwargs Keyword arguments to be passed to result_type.__init__
+        """
+        return result_type(testcase, TestResult._fail, *args, **kwargs)
+    
+    @staticmethod
+    def make_skipped(result_type, testcase, *args, **kwargs):
+        """
+        Create and return a skipped test result of type result_type.
+        @param result_type A class that extends TestResult
+        @param testcase The test case that generated the result
+        @param *args Additional positional arguments to be passed to result_type.__init__
+        @param *kwargs Keyword arguments to be passed to result_type.__init__
+        """
+        return result_type(testcase, TestResult._skipped, *args, **kwargs)
+    
+    def passed(self):
+        """ Return true iff the testcase passed. """
+        return self.status == TestResult._pass
+    
+    def errored(self):
+        """ Return true iff the testcase passed. """
+        return self.status == TestResult._error
+
+    def failed(self):
+        """ Return true iff the testcase passed. """
+        return self.status == TestResult._fail
+    
+    def skipped(self):
+        """ Return true iff the testcase was skipped """
+        return self.status == TestResult._skipped
+        
+    def pprint_dict(self):
+        """
+        Return a dict that is ideal for passing to pprint.
+        """
+        return {'testcase_name': self.testcase_name, 'status':self.status}
+    
+    def pretty_print(self, width=60, outfile=sys.stdout):
+        """
+        Print result to a file in a human readable way.
+        """
+        print('='*width, end='\n', file=outfile)
+        print("{}: {}".format(self.status, self.testcase_name), end='\n', file=outfile)
+        print('-'*width, end='\n', file=outfile)
+        print(self.pretty_message(), end='\n', file=outfile)
+        print('-'*width, end='\n', file=outfile)
+    
+    def pretty_message(self):
+        """ Return a message to be printed by pretty_print. Returns an empyt string if not overriden. """
+        return ''
+        
+
+
+class FailedTestResult(TestResult):
+    """
+    A basic implementation of TestResult for failed tests.
+    """
+    def __init__(self, testcase, status=TestResult._fail, reason=None):
+        super(FailedTestResult, self).__init__(testcase, status)
+        self.reason = reason
+    
+    def pprint_dict(self):
+        """
+        Return a dict that is ideal for passing to pprint.
+        """
+        ppdict = super(FailedTestResult, self).pprint_dict()
+        ppdict['reason'] = self.reason
+        return ppdict
+    
+    def pretty_message(self):
+        return self.reason
+
+
+class CompoundTestResult(TestResult):
+    """
+    A TestResult returned by running a sequencial test case
+    """
+    def __init__(self, testcase, results):
+        super(CompoundTestResult, self).__init__(testcase, None)
+        self.sub_results = results
+        status_list = [r.status for r in results]
+        if TestResult._fail in status_list:
+            self.status = TestResult._fail
+        elif TestResult._error in status_list:
+            self.status = TestResult._error
+        elif TestResult._pass in status_list:
+            self.status = TestResult._pass
+        else:
+            self.status = TestResult._skipped
+    
+    def pprint_dict(self):
+        """
+        Returns a dict that is ideal for passing to pprint.
+        """
+        ppdict = super(CompoundTestResult, self).pprint_dict()
+        ppdict['sub_results'] = list(s.pprint_dict() for s in self.sub_results)
+        return ppdict
+    
+    def pretty_message(self):
+        return '\n'.join(
+                "{}: {}{}".format(
+                    st.status,
+                    st.testcase_name,
+                    '\n' + st.pretty_message() if st.status in [TestResult._fail, TestResult._error] else '')
+                for st in self.sub_results)
+
+
+class SubTestResult(TestResult):
+    """
+    A TestResult for a subtest in a sequencial test case.
+    """
+    def __init__(self, subtest_name, inner_result):
+        """
+        @param subtest_name The name of the subtest.
+        @param inner_result The result returned from running the subtest.
+        """
+        super(SubTestResult, self).__init__(inner_result.testcase, inner_result.status)
+        self.inner_result = inner_result
+    
+    def pprint_dict(self):
+        """
+        Return a dict that is ideal for passing to pprint.
+        """
+        ppdict = super(CompoundTestResult, self).pprint_dict()
+        ppdict['inner_result'] = self.inner_result.pprint_dict()
+        return ppdict
+
+
+class UnhandledExceptionTestResult(TestResult):
+    """
+    A TestResult returned for exceptions that the test case failed to handle.
+    """
+    def __init__(self, testcase, status, exc_type, exc_value, exc_traceback):
+        super(UnhandledExceptionTestResult, self).__init__(testcase, status)
+        self.exception_type = exc_type
+        self.exception_value = exc_value
+        if not exc_traceback is None:
+            sio = util.StringIO()
+            traceback.print_exception(self.exception_type, self.exception_value, exc_traceback, file=sio)
+            self.exception_message = sio.getvalue()
+        else:
+            self.exception_message = "{}: {}".format(str(exc_type), str(exc_value))
+    
+    def pprint_dict(self):
+        """
+        Return a dict that is ideal for passing to pprint.
+        """
+        ppdict = super(UnhandledExceptionTestResult, self).pprint_dict()
+        ppdict['exception_type'] = self.exception_type
+        ppdict['exception_value'] = self.exception_value
+        ppdict['exception_message'] = self.exception_message
+        return ppdict
+    
+    def pretty_message(self):
+        return self.exception_message
+
+
+class TestCase(object):
+    """
+    Base class for all test cases
+    """
+    def __init__(self, name=None):
+        """
+        @param name A unique test case name.
+        """
+        self.name = name    
+    
+    def setUp(self):
+        """
+        Called imediately before a testcase is executed.
+        """
+        pass
+    
+    def run(self):
+        """
+        Run the test case, and return its result.
+        """
+        raise NotImplementedError
+    
+    def tearDown(self):
+        """
+        Called imediately after a testcase is executed.
+        """
+        pass
+    
+    def catch(self, exc):
+        """
+        Called when run raises an exception. If the test case
+        knows how to handle it, it should return it's own result or None.
+        Otherwise, return the original exception.
+        """
+        return exc
+    
+    def setresult(self, test_result):
+        """
+        Called after a test issues a result and before tearDown is called.
+        """
+        self.test_result = test_result
+    
+    def make_pass(self, result_type=TestResult, *args, **kwargs):
+        """
+        Make a passed result for this testcase.
+        """
+        return TestResult.make_pass(result_type, self, *args, **kwargs)
+    
+    def make_fail(self, result_type=FailedTestResult, *args, **kwargs):
+        """
+        Make a failed result for this testcase.
+        """
+        return TestResult.make_fail(result_type, self, *args, **kwargs)
+
+
+class SequencialTestCase(TestCase):
+    """
+    A test case that executes a sequence of subtests until
+    one fails.
+    """
+    def __init__(self, name):
+        super(SequencialTestCase, self).__init__(name)
+        self.tests = []
+    
+    def add_subtest(self, subtest_name, subtest_func):
+        """
+        Add a subtest.
+        """
+        self.tests.append((subtest_name, subtest_func))
+    
+    def run(self):
+        return CompoundTestResult(self, list(self._runall()))
+    
+    def _runall(self):
+        return _rungen([SubTestCase(name, func) for name, func in self.tests], failfast=True)
+
+
+class SubTestCase(TestCase):
+    """
+    A subtest of a sequncial test.
+    """
+    def __init__(self, name, func):
+        super(SubTestCase, self).__init__(name)
+        self.run = lambda: func(self)
+
+
+def run(tclist, failfast=False):
+    """
+    Run all test cases in tclist and return a list of thier results.
+    """
+    return list(_rungen(tclist, failfast))
+
+def _rungen(tclist, failfast=False):
+    """
+    A generator for running tests internally.
+    """
+    for tc in tclist:
+        result = None
+        tc.setUp()
+        try:
+            result = _result(tc.run(), tc)
+        except Exception as ex:
+            result = _result(tc.catch(ex), tc)
+        tc.tearDown()
+        yield result
+        if failfast and (result.failed() or result.errored()):
+            break
+
+def _result(res, tc):
+    """
+    Convert res to a TestResult object.
+    If res is a TestResult object, give it back.
+    If res is an Exception, return an UnandledExceptionTestResult.
+    If res is something else, discard it and return a passed TestResult.
+    """
+    if isinstance(res, TestResult):
+        return res
+    elif isinstance(res, Exception):
+        logging.info('uncaught exception: {}'.format(str(res)))
+        return TestResult.make_error(UnhandledExceptionTestResult, tc, *(sys.exc_info()))
+    else:
+        return TestResult.make_pass(TestResult, tc)
+
+def pprint_results(result_iter, outfile=sys.stdout):
+    """
+    Print pprint version of test results to a file-like object.
+    @param result_iter An iterator of results to print.
+    @param outfile An opened file-like object to print to (defaults to stdout).
+    """
+    status_func = lambda r: r.status
+    result_iter = sorted(result_iter, key=status_func)
+    status_dict = dict(iter((k, list(map(lambda tc: tc.pprint_dict(), g))) for k, g in itertools.groupby(result_iter, status_func)))
+    pprint.pprint(status_dict, stream=outfile)
+
+def pretty_print_results(
+        result_iter,
+        count_by_status=True, exclude_passed=True, exclude_skipped=True, exclude_failed=False,
+        exclude_errored=False, sort_by_status=True, width=60, outfile=sys.stdout):
+    """
+    Print iterator of TestResults in a human readable format to a file-like object.
+    @param result_iter An iterator of TestResult objects to print.
+    @param count_by_status Print the number of tests for each status (defaults to True).
+    @param exclude_passed Exclude passed test results from printing (defaults to True).
+    @param exclude_skipped Exclude skipped test results from printing (defaults to True).
+    @param exclude_failed Exclude failed test results from printing (defaults to False).
+    @param exclude_errored Exclude errored test results from printing (defaults to False).
+    @param sort_by_status Print test results in order of status: passed, errored, failed, then skipped (defaults to True).
+    @param width Printing width (defaults to 60).
+    @param outfile A file-like object to print to (defaults to stdout).
+    """
+    result_list = list(result_iter)
+    status_func = lambda r: r.status
+    if sort_by_status:
+        #TODO: printing order
+        result_iter = sorted(result_iter, key=status_func)
+    
+    if count_by_status:
+        print('Passed: {}'.format(len([tr for tr in result_list if tr.passed()])), file=outfile)
+        print('Errors: {}'.format(len([tr for tr in result_list if tr.errored()])), file=outfile)
+        print('Failed: {}'.format(len([tr for tr in result_list if tr.failed()])), file=outfile)
+        print('Skipped: {}'.format(len([tr for tr in result_list if tr.skipped()])), file=outfile)
+    #TODO: something that doesn't expose TestResult._*
+    print_status = set(itertools.compress([TestResult._pass, TestResult._error, TestResult._fail, TestResult._skipped],
+            map(lambda n: not n, [exclude_passed, exclude_errored, exclude_failed, exclude_skipped])))
+    for tr in (r for r in result_list if r.status in print_status):
+        tr.pretty_print(width=width, outfile=outfile)
+    
+    
diff --git a/test-chill/testchill/util.py b/test-chill/testchill/util.py
new file mode 100644
index 0000000..266a94d
--- /dev/null
+++ b/test-chill/testchill/util.py
@@ -0,0 +1,185 @@
+import difflib
+import functools
+import itertools
+import logging
+import os
+import re
+import sysconfig
+import subprocess
+import tempfile
+
+
+
+logging.basicConfig(filename='testchill.log', level=logging.DEBUG, filemode='w')
+#logging.basicConfig(level=logging.INFO)
+
+python_version = sysconfig.get_python_version()
+python_version_major = int(sysconfig.get_python_version().split('.')[0])
+python_version_minor = int(sysconfig.get_python_version().split('.')[1])
+
+if python_version_major == 2:
+    from StringIO import StringIO
+else:
+    from io import StringIO
+
+_temp_dirs = []
+_temp_files = []
+
+### Errors ###
+### Shell Util ###
+
+def shell(cmd, args=[], stdout=None, stderr=None, env={}, wd=os.getcwd()):
+    """
+    Execute a shell command.
+    @params cmd The command name
+    @params args A list of command line arguments (defaults to [])
+    @params stdout A file like object or file number that reads input written to stdout.
+            stdout will be returned as a string if this is None or not given.
+    @params stderr A file like object or file number that reads input written to stderr.
+    @params env A dict of environment variables. Before the command is executed, these will be exported
+    @params wd The working directory. Before the command is executed, the working directory will be changed to wd. (wd defaults to the current working directory)
+    """
+    fullcmd = ' '.join(['export {}={};'.format(k,str(v)) for k,v in env.items()] + ['cd {};'.format(wd)] + [cmd] + args)
+    logging.info('shell: '+fullcmd)
+    if stdout == None:
+        outp = subprocess.check_output(fullcmd, stderr=stderr, shell=True)
+        if python_version_major == 2:
+            return outp
+        elif python_version_major == 3:
+            return outp.decode()
+    else:
+        subprocess.check_call(fullcmd, stdout=stdout, stderr=stderr, shell=True)
+
+def mkdir_p(directory, temp=False, **kwargs):
+    """
+    Make directory (equivelent to shell('mkdir', ['-p', directory]))
+    """
+    if not os.path.exists(directory):
+        if temp and (directory not in _temp_dirs):
+            _temp_dirs.append(directory)
+        shell('mkdir', ['-p', directory], **kwargs)
+
+def set_tempfile(filename):
+    """
+    Add a file to a list of temp files
+    @param filename The full path to a temparary file.
+    """
+    _temp_files.append(filename)
+
+def withtmp(wtfunc, rdfunc):
+    """
+    Perform some operation using a temporary file.
+    @param wtfunc A function that writes to the temparary file
+    @param rdfybc A function that reads from the temparary file
+    """
+    with tempfile.TemporaryFile() as f:
+        wtfunc(f)
+        f.seek(0)
+        return rdfunc(f)
+
+def rmtemp():
+    """
+    Clean temp files and directories
+    """
+    for temp_file in list(_temp_files):
+        if os.path.exists(temp_file):
+            shell('rm', [temp_file])
+        _temp_files.remove(temp_file)
+        
+    for temp_dir in list(_temp_dirs):
+        if os.path.exists(temp_dir):
+            shell('rm', ['-rf', temp_dir])
+        _temp_dirs.remove(temp_dir)
+
+def mktemp(mode=None):
+    """
+    Create a temparary file. Returns a two-tuple with an open file object and the filename.
+    """
+    fd, name = tempfile.mkstemp()
+    _temp_files.append(name)
+    if mode is None:
+        os.close(fd)
+        return name
+    else:
+        return os.fdopen(fd, mode), name
+    
+
+### Misc Util ###
+
+def copy(obj, exclude=[]):
+    """
+    Make a shallow copy of a python object with __dict__, excluding any attribute in exclude
+    @param obj The object to copy
+    @param exclude A list of attributes to ignore
+    """
+    nobj = type(obj)()
+    for k, v in vars(obj).items():
+        if k in exclude: continue
+        setattr(nobj, k, v)
+    return nobj
+
+def applyenv(line):
+    """
+    Apply bash style environment variables to a string
+    @param line The input string
+    """
+    return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)\b',lambda m: str(os.getenv(m.group(1), '')), line)
+
+def callonce(func):
+    """
+    Assert that a function is only ever called once.
+    @param func Function to only be run once.
+    """
+    pred_name = '__' + func.__module__.replace('.','__') + '_' + func.__name__ + '_called'
+    globals()[pred_name] = False
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        if not globals()[pred_name]:
+            globals()[pred_name] = True
+            return func(*args, **kwargs)
+        else:
+            raise Exception('{} was invoked multiple times.'.format(func.__name___))
+    return wrapper
+
+def isdiff(strone, strtwo):
+    """
+    Diff two strings. Returns a two element tuple. The first is True if the the two files are different, and the
+    next is a textual representation of the diff.
+    @param strone First string.
+    @param strtwo Second string.
+    """
+    diff = list(difflib.ndiff(strone.splitlines(), strtwo.splitlines()))
+    return len(list(line for line in diff if line[0] in ['-','+'])) != 0, '\n'.join(diff)
+
+def filterext(ext_list, filenames):
+    """
+    Filter file names by extension.
+    @param ext_list A list of extensions.
+    @param filenames An iterable object of file names.
+    """
+    return iter(s for s in filenames if any(s.strip().endswith(e) for e in ext_list))
+
+def extract_tag(tagname, filename, wd=os.getcwd()):
+    """
+    Extract commented out text in each html tag '<tagname>'. Returns a list of tuples for each tag.
+    Each tuple has two elements, the first is the text found in the tag, the second contains a dict
+    of attributes given in the tag.
+    @param tagname The name of the tag to search for.
+    @param filename A filename to search for comments in.
+    @param wd The working directory.
+    """
+    from . import _extract
+    return _extract.extract_tag(tagname, filename, wd)
+
+def textstream(txt):
+    """
+    Creates a stream from text. Intended to hide version differences between 2 and 3.
+    @param txt A string to use as the default data in a stream.
+    """
+    if python_version_major == 2:
+        import StringIO
+        return StringIO.StringIO(txt)
+    elif python_version_major == 3:
+        import io
+        return io.StringIO(txt)
+
diff --git a/test-chill/unit-tests/__init__.py b/test-chill/unit-tests/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/test-chill/unit-tests/__init__.py
@@ -0,0 +1 @@
+
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_in.py b/test-chill/unit-tests/cpp_validate_prog/mm_in.py
new file mode 100755
index 0000000..93eb080
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_in.py
@@ -0,0 +1,9 @@
+#!/usr/bin/python
+
+import struct
+
+data = list(range(15)) + list(range(10)) + [0]*6
+bindata = ''.join([struct.pack('f',n) for n in data])
+with open('mm.in.data','wb') as f:
+    f.write(bindata)
+
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_one.cc b/test-chill/unit-tests/cpp_validate_prog/mm_one.cc
new file mode 100644
index 0000000..6131ae1
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_one.cc
@@ -0,0 +1,29 @@
+#define AN 3
+#define BM 2
+#define AMBN 5
+
+/*
+
+<test name='mm_small'>
+
+procedure void mm(
+    in  float[3][5] A = matrix([*,*],lambda i,j: random(-8,8)),
+    in  float[5][2] B = matrix([*,*],lambda i,j: random(-8,8)),
+    out float[3][2] C = matrix([*,*],lambda i,j: 0))
+
+</test>
+
+*/
+
+void mm(float A[AN][AMBN], float B[AMBN][BM], float C[AN][BM]) {
+    int i;
+    int j;
+    int k;
+    for(i = 0; i < AN; i++) {
+        for(j = 0; j < BM; j++) {
+            for(k = 0; k < AMBN; k++) {
+                C[i][j] += A[i][k] * B[k][j];
+            }
+        }
+    }
+}
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_one.testproc b/test-chill/unit-tests/cpp_validate_prog/mm_one.testproc
new file mode 100644
index 0000000..a12a963
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_one.testproc
@@ -0,0 +1,6 @@
+
+procedure void mm(
+    in  float[3][5] A = matrix([*,*],lambda i,j: random(-8,8)),
+    in  float[5][2] B = matrix([*,*],lambda i,j: random(-8,8)),
+    out float[3][2] C = matrix([*,*],lambda i,j: 0))
+
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_one_defines.cc b/test-chill/unit-tests/cpp_validate_prog/mm_one_defines.cc
new file mode 100644
index 0000000..e35f189
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_one_defines.cc
@@ -0,0 +1,25 @@
+
+/*
+<test name='mm_small' define="{'AN':3, 'BM':2, 'AMBN':5}">
+
+procedure void mm(
+    in  float[AN][AMBN] A = matrix([*,*],lambda i,j: random(-8,8)),
+    in  float[AMBN][BM] B = matrix([*,*],lambda i,j: random(-8,8)),
+    out float[AN][BM]   C = matrix([*,*],lambda i,j: 0))
+
+</test>
+
+*/
+
+void mm(float A[AN][AMBN], float B[AMBN][BM], float C[AN][BM]) {
+    int i;
+    int j;
+    int k;
+    for(i = 0; i < AN; i++) {
+        for(j = 0; j < BM; j++) {
+            for(k = 0; k < AMBN; k++) {
+                C[i][j] += A[i][k] * B[k][j];
+            }
+        }
+    }
+}
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_one_longer_main.cc b/test-chill/unit-tests/cpp_validate_prog/mm_one_longer_main.cc
new file mode 100644
index 0000000..5b7e6c1
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_one_longer_main.cc
@@ -0,0 +1,93 @@
+#define AN 3
+#define BM 2
+#define AMBN 5
+//#define PRINT
+
+#include <time.h>
+#include <fstream>
+#include <cstdio>
+
+/*
+
+<test name='mm_small'>
+
+procedure void mm(
+    in  float[3][5] A = matrix([*,*],lambda i,j: random(-8,8)),
+    in  float[5][2] B = matrix([*,*],lambda i,j: random(-8,8)),
+    out float[3][2] C = matrix([*,*],lambda i,j: 0))
+
+</test>
+
+*/
+
+void mm(float A[AN][AMBN], float B[AMBN][BM], float C[AN][BM]) {
+    int i;
+    int j;
+    int k;
+    for(i = 0; i < AN; i++) {
+        for(j = 0; j < BM; j++) {
+            C[i][j] = 0;
+            for(k = 0; k < AMBN; k++) {
+                C[i][j] += A[i][k] * B[k][j];
+            }
+        }
+    }
+}
+
+int main(int argc, char** argv) {
+    float A[3][5] = {{0,1,2,3,4},{5,6,7,8,9},{10,11,12,13,14}};
+    float B[5][2] = {{0,1},{2,3},{4,5},{6,7},{8,9}};
+    float C[3][2] = {{0,0},{0,0},{0,0}};
+    timespec start_time;
+    timespec end_time;
+    
+    if (argc == 3) {
+        std::ifstream is(argv[1], std::ifstream::in | std::ifstream::binary);
+        is.read((char*)A, 15*sizeof(float));
+        is.read((char*)B, 10*sizeof(float));
+        is.close();
+    }
+    
+    clock_gettime(CLOCK_REALTIME, &start_time);
+    for(int i = 0; i < 10000; i++) {
+        mm(A,B,C);
+    }
+    clock_gettime(CLOCK_REALTIME, &end_time);
+    
+    if (argc == 3) {
+        std::ofstream os(argv[2], std::ofstream::out | std::ofstream::binary);
+        os.write((char*)C, 6*sizeof(float));
+        os.close();
+    }
+    
+    #ifdef PRINT
+    std::printf("A:\n");
+    for(int i = 0; i < 3; i++) {
+        std::printf("[");
+        for(int j = 0; j < 5; j++) {
+            std::printf("%f,",A[i][j]);
+        }
+        std::printf("]\n");
+    }
+    std::printf("B:\n");
+    for(int i = 0; i < 5; i++) {
+        std::printf("[");
+        for(int j = 0; j < 2; j++) {
+            std::printf("%f,",B[i][j]);
+        }
+        std::printf("]\n");
+    }
+    std::printf("C:\n");
+    for(int i = 0; i < 3; i++) {
+        std::printf("[");
+        for(int j = 0; j < 2; j++) {
+            std::printf("%f,",C[i][j]);
+        }
+        std::printf("]\n");
+    }
+    #else
+    double time_diff = (end_time.tv_sec - start_time.tv_sec) + (end_time.tv_nsec - start_time.tv_nsec)/1000000000.0;
+    std::printf("(%f,)", time_diff);
+    #endif
+    return 0;
+}
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_one_longer_wrong_main.cc b/test-chill/unit-tests/cpp_validate_prog/mm_one_longer_wrong_main.cc
new file mode 100644
index 0000000..7d96248
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_one_longer_wrong_main.cc
@@ -0,0 +1,93 @@
+#define AN 3
+#define BM 2
+#define AMBN 5
+//#define PRINT
+
+#include <time.h>
+#include <fstream>
+#include <cstdio>
+
+/*
+
+<test name='mm_small'>
+
+procedure void mm(
+    in  float[3][5] A = matrix([*,*],lambda i,j: random(-8,8)),
+    in  float[5][2] B = matrix([*,*],lambda i,j: random(-8,8)),
+    out float[3][2] C = matrix([*,*],lambda i,j: 0))
+
+</test>
+
+*/
+
+void mm(float A[AN][AMBN], float B[AMBN][BM], float C[AN][BM]) {
+    int i;
+    int j;
+    int k;
+    for(i = 0; i < AN; i++) {
+        for(j = 0; j < BM; j++) {
+            C[i][j] = 0;
+            for(k = 0; k < AMBN; k++) {
+                C[i][j] += A[i][k] + B[k][j];
+            }
+        }
+    }
+}
+
+int main(int argc, char** argv) {
+    float A[3][5] = {{0,1,2,3,4},{5,6,7,8,9},{10,11,12,13,14}};
+    float B[5][2] = {{0,1},{2,3},{4,5},{6,7},{8,9}};
+    float C[3][2] = {{0,0},{0,0},{0,0}};
+    timespec start_time;
+    timespec end_time;
+    
+    if (argc == 3) {
+        std::ifstream is(argv[1], std::ifstream::in | std::ifstream::binary);
+        is.read((char*)A, 15*sizeof(float));
+        is.read((char*)B, 10*sizeof(float));
+        is.close();
+    }
+    
+    clock_gettime(CLOCK_REALTIME, &start_time);
+    for(int i = 0; i < 1000000; i++) {
+        mm(A,B,C);
+    }
+    clock_gettime(CLOCK_REALTIME, &end_time);
+    
+    if (argc == 3) {
+        std::ofstream os(argv[2], std::ofstream::out | std::ofstream::binary);
+        os.write((char*)C, 6*sizeof(float));
+        os.close();
+    }
+    
+    #ifdef PRINT
+    std::printf("A:\n");
+    for(int i = 0; i < 3; i++) {
+        std::printf("[");
+        for(int j = 0; j < 5; j++) {
+            std::printf("%f,",A[i][j]);
+        }
+        std::printf("]\n");
+    }
+    std::printf("B:\n");
+    for(int i = 0; i < 5; i++) {
+        std::printf("[");
+        for(int j = 0; j < 2; j++) {
+            std::printf("%f,",B[i][j]);
+        }
+        std::printf("]\n");
+    }
+    std::printf("C:\n");
+    for(int i = 0; i < 3; i++) {
+        std::printf("[");
+        for(int j = 0; j < 2; j++) {
+            std::printf("%f,",C[i][j]);
+        }
+        std::printf("]\n");
+    }
+    #else
+    double time_diff = (end_time.tv_sec - start_time.tv_sec) + (end_time.tv_nsec - start_time.tv_nsec)/1000000000.0;
+    std::printf("(%f,)", time_diff);
+    #endif
+    return 0;
+}
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_one_main.cc b/test-chill/unit-tests/cpp_validate_prog/mm_one_main.cc
new file mode 100644
index 0000000..a03b505
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_one_main.cc
@@ -0,0 +1,91 @@
+#define AN 3
+#define BM 2
+#define AMBN 5
+//#define PRINT
+
+#include <time.h>
+#include <fstream>
+#include <cstdio>
+
+/*
+
+<test name='mm_small'>
+
+procedure void mm(
+    in  float[3][5] A = matrix([*,*],lambda i,j: random(-8,8)),
+    in  float[5][2] B = matrix([*,*],lambda i,j: random(-8,8)),
+    out float[3][2] C = matrix([*,*],lambda i,j: 0))
+
+</test>
+
+*/
+
+void mm(float A[AN][AMBN], float B[AMBN][BM], float C[AN][BM]) {
+    int i;
+    int j;
+    int k;
+    for(i = 0; i < AN; i++) {
+        for(j = 0; j < BM; j++) {
+            C[i][j] = 0;
+            for(k = 0; k < AMBN; k++) {
+                C[i][j] += A[i][k] * B[k][j];
+            }
+        }
+    }
+}
+
+int main(int argc, char** argv) {
+    float A[3][5] = {{0,1,2,3,4},{5,6,7,8,9},{10,11,12,13,14}};
+    float B[5][2] = {{0,1},{2,3},{4,5},{6,7},{8,9}};
+    float C[3][2] = {{0,0},{0,0},{0,0}};
+    timespec start_time;
+    timespec end_time;
+    
+    if (argc == 3) {
+        std::ifstream is(argv[1], std::ifstream::in | std::ifstream::binary);
+        is.read((char*)A, 15*sizeof(float));
+        is.read((char*)B, 10*sizeof(float));
+        is.close();
+    }
+    
+    clock_gettime(CLOCK_REALTIME, &start_time);
+    mm(A,B,C);
+    clock_gettime(CLOCK_REALTIME, &end_time);
+    
+    if (argc == 3) {
+        std::ofstream os(argv[2], std::ofstream::out | std::ofstream::binary);
+        os.write((char*)C, 6*sizeof(float));
+        os.close();
+    }
+    
+    #ifdef PRINT
+    std::printf("A:\n");
+    for(int i = 0; i < 3; i++) {
+        std::printf("[");
+        for(int j = 0; j < 5; j++) {
+            std::printf("%f,",A[i][j]);
+        }
+        std::printf("]\n");
+    }
+    std::printf("B:\n");
+    for(int i = 0; i < 5; i++) {
+        std::printf("[");
+        for(int j = 0; j < 2; j++) {
+            std::printf("%f,",B[i][j]);
+        }
+        std::printf("]\n");
+    }
+    std::printf("C:\n");
+    for(int i = 0; i < 3; i++) {
+        std::printf("[");
+        for(int j = 0; j < 2; j++) {
+            std::printf("%f,",C[i][j]);
+        }
+        std::printf("]\n");
+    }
+    #else
+    double time_diff = (end_time.tv_sec - start_time.tv_sec) + (end_time.tv_nsec - start_time.tv_nsec)/1000000000.0;
+    std::printf("(%f,)", time_diff);
+    #endif
+    return 0;
+}
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_one_out.cc b/test-chill/unit-tests/cpp_validate_prog/mm_one_out.cc
new file mode 100644
index 0000000..6151301
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_one_out.cc
@@ -0,0 +1,60 @@
+#include <time.h>
+#include <fstream>
+#include <cstdio>
+
+
+#define AN 3
+#define BM 2
+#define AMBN 5
+
+/*
+
+<test name='mm_small'>
+
+procedure void mm(
+    in  float[3][5] A = matrix([*,*],lambda i,j: random(-8,8)),
+    in  float[5][2] B = matrix([*,*],lambda i,j: random(-8,8)),
+    out float[3][2] C = matrix([*,*],lambda i,j: 0))
+
+</test>
+
+*/
+
+void mm(float A[AN][AMBN], float B[AMBN][BM], float C[AN][BM]) {
+    int i;
+    int j;
+    int k;
+    for(i = 0; i < AN; i++) {
+        for(j = 0; j < BM; j++) {
+            for(k = 0; k < AMBN; k++) {
+                C[i][j] += A[i][k] * B[k][j];
+            }
+        }
+    }
+}
+
+int main(int argc, char** argv) {
+    float A[3][5];
+    float B[5][2];
+    float C[3][2];
+    timespec start_time;
+    timespec end_time;
+    
+    std::ifstream datafile_initialize(argv[1]);
+    datafile_initialize.read((char*)A, 15*sizeof(float));
+    datafile_initialize.read((char*)B, 10*sizeof(float));
+    datafile_initialize.read((char*)C, 6*sizeof(float));
+    datafile_initialize.close();
+    
+    clock_gettime(CLOCK_REALTIME, &start_time);
+    mm(A,B,C);
+    clock_gettime(CLOCK_REALTIME, &end_time);
+    
+    std::ofstream datafile_out(argv[2]);
+    datafile_out.write((char*)C, 6*sizeof(float));
+    datafile_out.close();
+    
+    double time_diff = (end_time.tv_sec - start_time.tv_sec) + (end_time.tv_nsec - start_time.tv_nsec)/1000000000.0;
+    std::printf("(%f,)", time_diff);
+    return 0;
+}
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_one_with.cc b/test-chill/unit-tests/cpp_validate_prog/mm_one_with.cc
new file mode 100644
index 0000000..9cb0ae4
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_one_with.cc
@@ -0,0 +1,30 @@
+#define AN 3
+#define BM 2
+#define AMBN 5
+
+/*
+
+<test name='mm_small'>
+
+with {evendist2:lambda i,j: random(-8,8), zero2:lambda i,j: 0}
+procedure void mm(
+    in  float[3][5] A = matrix([*,*],evendist2),
+    in  float[5][2] B = matrix([*,*],evendist2),
+    out float[3][2] C = matrix([*,*],zero2))
+
+</test>
+
+*/
+
+void mm(float A[AN][AMBN], float B[AMBN][BM], float C[AN][BM]) {
+    int i;
+    int j;
+    int k;
+    for(i = 0; i < AN; i++) {
+        for(j = 0; j < BM; j++) {
+            for(k = 0; k < AMBN; k++) {
+                C[i][j] += A[i][k] * B[k][j];
+            }
+        }
+    }
+}
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_one_with.testproc b/test-chill/unit-tests/cpp_validate_prog/mm_one_with.testproc
new file mode 100644
index 0000000..80bc841
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_one_with.testproc
@@ -0,0 +1,7 @@
+
+with {evendist2:lambda i,j: random(-8,8), zero2:lambda i,j: 0}
+procedure void mm(
+    in  float[3][5] A = matrix([*,*],evendist2),
+    in  float[5][2] B = matrix([*,*],evendist2),
+    out float[3][2] C = matrix([*,*],zero2))
+
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_one_with_defines.cc b/test-chill/unit-tests/cpp_validate_prog/mm_one_with_defines.cc
new file mode 100644
index 0000000..77ce673
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_one_with_defines.cc
@@ -0,0 +1,25 @@
+
+/*
+<test name='mm_small' define="{'AN':3, 'BM':2, 'AMBN':5}">
+
+with {evendist2:lambda i,j: random(-8,8), zero2:lambda i,j: 0}
+procedure void mm(
+    in  float[AN][AMBN] A = matrix([*,*],evendist2),
+    in  float[AMBN][BM] B = matrix([*,*],evendist2),
+    out float[AN][BM]   C = matrix([*,*],zero2))
+
+</test>
+*/
+
+void mm(float A[AN][AMBN], float B[AMBN][BM], float C[AN][BM]) {
+    int i;
+    int j;
+    int k;
+    for(i = 0; i < AN; i++) {
+        for(j = 0; j < BM; j++) {
+            for(k = 0; k < AMBN; k++) {
+                C[i][j] += A[i][k] * B[k][j];
+            }
+        }
+    }
+}
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_three_basic.cc b/test-chill/unit-tests/cpp_validate_prog/mm_three_basic.cc
new file mode 100644
index 0000000..49df049
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_three_basic.cc
@@ -0,0 +1,33 @@
+/*
+<test name=small define="{'AN':2, 'AMBN':5, 'BM':3}">
+    procedure void mm(
+        in  float[AN][AMBN] A = matrix([,],lambda i,j: i*AMBN + j),
+        in  float[AMBN][BM] B = matrix([,],lambda i,j: i*BM + j),
+        out float[AN][BM]   C = matrix([,],lambda i,j: 0))
+</test>
+
+<test name=medium define="{'AN':20, 'AMBN':50, 'BM':30}">
+    procedure void mm(
+        in  float[AN][AMBN] A = matrix([,],lambda i,j: i*AMBN + j),
+        in  float[AMBN][BM] B = matrix([,],lambda i,j: i*BM + j),
+        out float[AN][BM]   C = matrix([,],lambda i,j: 0))
+</test>
+
+<test name=big define="{'AN':200, 'AMBN':500, 'BM':300}">
+    procedure void mm(
+        in  float[AN][AMBN] A = matrix([,],lambda i,j: i*AMBN + j),
+        in  float[AMBN][BM] B = matrix([,],lambda i,j: i*BM + j),
+        out float[AN][BM]   C = matrix([,],lambda i,j: 0))
+</test>
+*/
+
+void mm(float A[AN][AMBN], float B[AMBN][BM], float C[AN][BM]) {
+    for(int i = 0; i < AN; i++) {
+        for(int j = 0; j < BM; j++) {
+            C[i][j] = 0;
+            for(int k = 0; k < AMBN; k++) {
+                C[i][j] += A[i][k] * B[k][j];
+            }
+        }
+    }
+}
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_three_basic.cc.data b/test-chill/unit-tests/cpp_validate_prog/mm_three_basic.cc.data
new file mode 100644
index 0000000..82c5ce6
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_three_basic.cc.data
diff --git a/test-chill/unit-tests/cpp_validate_prog/mm_three_slow.cc b/test-chill/unit-tests/cpp_validate_prog/mm_three_slow.cc
new file mode 100644
index 0000000..dd8c7e7
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/mm_three_slow.cc
@@ -0,0 +1,35 @@
+/*
+<test name=small define="{'AN':2, 'AMBN':5, 'BM':3}">
+    procedure void mm(
+        in  float[AN][AMBN] A = matrix([,],lambda i,j: i*AMBN + j),
+        in  float[AMBN][BM] B = matrix([,],lambda i,j: i*BM + j),
+        out float[AN][BM]   C = matrix([,],lambda i,j: 0))
+</test>
+
+<test name=medium define="{'AN':20, 'AMBN':50, 'BM':30}">
+    procedure void mm(
+        in  float[AN][AMBN] A = matrix([,],lambda i,j: i*AMBN + j),
+        in  float[AMBN][BM] B = matrix([,],lambda i,j: i*BM + j),
+        out float[AN][BM]   C = matrix([,],lambda i,j: 0))
+</test>
+
+<test name=big define="{'AN':200, 'AMBN':500, 'BM':300}">
+    procedure void mm(
+        in  float[AN][AMBN] A = matrix([,],lambda i,j: i*AMBN + j),
+        in  float[AMBN][BM] B = matrix([,],lambda i,j: i*BM + j),
+        out float[AN][BM]   C = matrix([,],lambda i,j: 0))
+</test>
+*/
+
+void mm(float A[AN][AMBN], float B[AMBN][BM], float C[AN][BM]) {
+    for(int w = 0; w < 100; w++) {
+        for(int i = 0; i < AN; i++) {
+            for(int j = 0; j < BM; j++) {
+                C[i][j] = 0;
+                for(int k = 0; k < AMBN; k++) {
+                    C[i][j] += A[i][k] * B[k][j];
+                }
+            }
+        }
+    }
+}
diff --git a/test-chill/unit-tests/cpp_validate_prog/print_mm_out.py b/test-chill/unit-tests/cpp_validate_prog/print_mm_out.py
new file mode 100755
index 0000000..fefbd2a
--- /dev/null
+++ b/test-chill/unit-tests/cpp_validate_prog/print_mm_out.py
@@ -0,0 +1,10 @@
+#!/usr/bin/python
+
+import struct
+import numpy as np
+
+with open('mm.out.data','rb') as f:
+    data = f.read()
+
+mat = np.array([struct.unpack_from('f',data,n*4) for n in range(len(data)/4)]).reshape((3,2))
+print(mat)
diff --git a/test-chill/unit-tests/cprog/Makefile b/test-chill/unit-tests/cprog/Makefile
new file mode 100644
index 0000000..f5f2608
--- /dev/null
+++ b/test-chill/unit-tests/cprog/Makefile
@@ -0,0 +1,17 @@
+OBJS = $(patsubst %.cc, %.o, $(wildcard *.cc))
+
+.PHONY: all
+all: sorter
+
+$(OBJS): %.o: %.cc
+	g++ -g -fprofile-arcs -ftest-coverage -c $< -o $@
+
+.PHONY: sorter
+sorter: $(OBJS)
+	g++ -g -fprofile-arcs -ftest-coverage -o bin/sorter $(OBJS)
+
+.PHONY: clean
+clean:
+	rm -f *.o
+	rm -f *.gcno *.gcda *.gcov
+	rm -f bin/sorter
diff --git a/test-chill/unit-tests/cprog/MergeSorter.cc b/test-chill/unit-tests/cprog/MergeSorter.cc
new file mode 100644
index 0000000..6e747a3
--- /dev/null
+++ b/test-chill/unit-tests/cprog/MergeSorter.cc
@@ -0,0 +1,77 @@
+#include "MergeSorter.h"
+
+/* Python
+def msort(lst, start, end, pindent = 0):
+    if start == end:
+        return
+    center = start + ((end - start) // 2)
+    print(' '*pindent + "SPLIT {}|{}".format(lst[start:center+1], lst[center+1:end+1]))
+    msort(lst, start, center, pindent+1)
+    msort(lst, center+1, end, pindent+1)
+    left = list(lst[start:center+1])
+    right = list(lst[center+1:end+1])
+    print(' '*pindent + "MERGE {}|{}".format(lst[start:center+1], lst[center+1:end+1]))
+    i,j = 0, 0
+    for k in range(start, end+1):
+        if i >= len(left):
+            lst[k] = right[j]
+            j += 1
+            print(' '*(pindent+1) + 'pull j: {} {} {}'.format(lst[start:k+1], left[i:], right[j:]))
+        elif j >= len(right):
+            lst[k] = left[i]
+            i += 1
+            print(' '*(pindent+1) + 'pull i: {} {} {}'.format(lst[start:k+1], left[i:], right[j:]))
+        elif left[i] > right[j]:
+            lst[k] = right[j]
+            j += 1
+            print(' '*(pindent+1) + 'pull j: {} {} {}'.format(lst[start:k+1], left[i:], right[j:]))
+        else:
+            lst[k] = left[i]
+            i += 1
+            print(' '*(pindent+1) + 'pull i: {} {} {}'.format(lst[start:k+1], left[i:], right[j:]))
+    print(' '*pindent + "-- {}".format(lst[start:end+1]))
+        
+
+if __name__ == '__main__':
+    import random as r
+    x = [int(r.random()*12) for i in range(7)]
+    print(x)
+    msort(x, 0, len(x)-1)
+    print(x)
+*/
+
+static void mergesort(std::vector<int>& lst, int start, int end) {
+    if(start == end) return;
+    int center = start + (end-start)/2;
+    mergesort(lst, start, center);
+    mergesort(lst, center+1, end);
+    std::vector<int> left = std::vector<int>(lst.begin()+start, lst.begin()+(center+1));
+    std::vector<int> right = std::vector<int>(lst.begin()+(center+1),lst.begin()+(end+1));
+    int i = 0;
+    int j = 0;
+    for(int k = start; k < (end+1); k++) {
+        if (i >= left.size()) {
+            lst[k] = right[j++];
+        }
+        else if(j >= right.size()) {
+            lst[k] = left[i++];
+        }
+        else if(left[i] > right[j]) {
+            lst[k] = right[j++];
+        }
+        else {
+            lst[k] = left[i++];
+        }
+    }
+}
+
+MergeSorter::MergeSorter() {
+    this->name = std::string("mergesort");
+}
+
+MergeSorter::~MergeSorter() {
+}
+
+void MergeSorter::sort(std::vector<int>& list) const {
+    mergesort(list, 0, list.size()-1);
+}
diff --git a/test-chill/unit-tests/cprog/MergeSorter.h b/test-chill/unit-tests/cprog/MergeSorter.h
new file mode 100644
index 0000000..e2ed391
--- /dev/null
+++ b/test-chill/unit-tests/cprog/MergeSorter.h
@@ -0,0 +1,14 @@
+#ifndef MERGE_SORTER_H
+#define MERGE_SORTER_H
+
+#include <vector>
+#include "Sorter.h"
+
+class MergeSorter : public Sorter {
+public:
+    MergeSorter();
+    virtual ~MergeSorter();
+    virtual void sort(std::vector<int>& list) const;
+};
+
+#endif
diff --git a/test-chill/unit-tests/cprog/QuickSorter.cc b/test-chill/unit-tests/cprog/QuickSorter.cc
new file mode 100644
index 0000000..3ade346
--- /dev/null
+++ b/test-chill/unit-tests/cprog/QuickSorter.cc
@@ -0,0 +1,83 @@
+#include "QuickSorter.h"
+
+/* Python
+
+def swap(l, i, k):
+    v = l[i]
+    l[i] = l[k]
+    l[k] = v
+    print(str(l))
+
+def partition(l, start, end):
+    print("PARTITION {} [{}:{}]".format(l, start, end))
+    p_value = l[end]
+    p_index = end-1
+    
+    for i in range(start, end):
+        while(i < p_index and l[i] >= p_value):
+            swap(l, i, p_index)
+            p_index -= 1
+        while(i >= p_index and l[i] < p_value):
+            swap(l, i, p_index)
+            p_index += 1
+    swap(l, p_index, end)
+    print("DONE {}|[{}]|{}:{}".format(l[start:p_index], l[p_index], l[p_index+1:end+1], p_value))
+    return p_index
+
+def qsort(l, i, k):
+    if i < k:
+        p = partition(l, i, k)
+        qsort(l,i,p-1)
+        qsort(l,p+1,k)
+
+if __name__ == "__main__":
+    import random as r
+    x = [int(r.random()*12) for i in range(12)]
+    print(x)
+    qsort(x, 0, len(x)-1)
+    print(x)
+    
+*/
+
+static void swap(std::vector<int>& list, int i, int k) {
+    int v = list[i];
+    list[i] = list[k];
+    list[k] = v;
+}
+
+static int partition(std::vector<int>& list, int i, int k) {
+    int pivot_value = list[k];
+    int pivot_index = k - 1;
+    
+    for(int index = i; index < k; index++) {
+        while((index < pivot_index) && (list[index] >= pivot_value)) {
+            swap(list, index, pivot_index);
+            pivot_index--;
+        }
+        while((index >= pivot_index) && (list[index] < pivot_value)) {
+            swap(list, index, pivot_index);
+            pivot_index++;
+        }
+    }
+    swap(list, pivot_index, k);
+    return pivot_index;
+}
+
+static void quicksort(std::vector<int>& list, int i, int k) {
+    if(i < k) {
+        int p = partition(list, i, k);
+        quicksort(list, i, p-1);
+        quicksort(list, p+1, k);
+    }
+}
+
+QuickSorter::QuickSorter() {
+    this->name = std::string("quicksort");
+}
+
+QuickSorter::~QuickSorter() {
+}
+
+void QuickSorter::sort(std::vector<int>& list) const {
+    quicksort(list, 0, list.size()-1);
+}
diff --git a/test-chill/unit-tests/cprog/QuickSorter.h b/test-chill/unit-tests/cprog/QuickSorter.h
new file mode 100644
index 0000000..81919dd
--- /dev/null
+++ b/test-chill/unit-tests/cprog/QuickSorter.h
@@ -0,0 +1,14 @@
+#ifndef QUICK_SORTER_H
+#define QUICK_SORTER_H
+
+#include <vector>
+#include "Sorter.h"
+
+class QuickSorter : public Sorter {
+public:
+    QuickSorter();
+    virtual ~QuickSorter();
+    virtual void sort(std::vector<int>& list) const;
+};
+
+#endif
diff --git a/test-chill/unit-tests/cprog/Sorter.cc b/test-chill/unit-tests/cprog/Sorter.cc
new file mode 100644
index 0000000..a1ae5ec
--- /dev/null
+++ b/test-chill/unit-tests/cprog/Sorter.cc
@@ -0,0 +1,8 @@
+#include "Sorter.h"
+
+Sorter::Sorter() {
+}
+
+Sorter::~Sorter() {
+}
+
diff --git a/test-chill/unit-tests/cprog/Sorter.h b/test-chill/unit-tests/cprog/Sorter.h
new file mode 100644
index 0000000..abf8f82
--- /dev/null
+++ b/test-chill/unit-tests/cprog/Sorter.h
@@ -0,0 +1,16 @@
+#ifndef SORTER_H
+#define SORTER_H
+
+#include <string>
+#include <vector>
+
+class Sorter {
+public:
+    Sorter();
+    virtual ~Sorter();
+    
+    std::string name;
+    virtual void sort(std::vector<int>& list) const = 0;
+};
+
+#endif
diff --git a/test-chill/unit-tests/cprog/main.cc b/test-chill/unit-tests/cprog/main.cc
new file mode 100644
index 0000000..3fe960b
--- /dev/null
+++ b/test-chill/unit-tests/cprog/main.cc
@@ -0,0 +1,45 @@
+#include <cstdio>
+#include <cstdlib>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "Sorter.h"
+#include "QuickSorter.h"
+#include "MergeSorter.h"
+//#include "InsertionSorter.h"
+//#include "ShellSorter.h"
+
+void read_vector(std::vector<int>& vec, int start, int stop, char** argv) {
+    for(int i = start; i < stop; i++) {
+        vec.push_back((int)strtol(argv[i],NULL,0));
+    }
+}
+
+void print_vector(std::vector<int>& vec) {
+    printf("[");
+    for(std::vector<int>::iterator iter = vec.begin(); iter != vec.end(); iter++) {
+        printf(" %d ", *iter);
+    }
+    printf("]\n");
+}
+
+void addsorter(std::map<std::string, Sorter*>& m, Sorter* s) {
+    m[s->name] = s;
+}
+
+int main(int argc, char** argv) {
+    std::map<std::string, Sorter*> sorter_map;
+    std::vector<int> vec;
+    
+    read_vector(vec, 2, argc, argv);
+    print_vector(vec);
+    
+    addsorter(sorter_map, new QuickSorter());
+    addsorter(sorter_map, new MergeSorter());
+    //addsorter(sorter_map, new InsertionSorter());
+    //addsorter(sorter_map, new ShellSorter());
+    sorter_map[std::string(argv[1])]->sort(vec);
+    print_vector(vec);
+}
+
diff --git a/test-chill/unit-tests/test___main__.py b/test-chill/unit-tests/test___main__.py
new file mode 100644
index 0000000..7a79417
--- /dev/null
+++ b/test-chill/unit-tests/test___main__.py
@@ -0,0 +1,205 @@
+import os
+import unittest
+
+import testchill.gcov as gcov
+import testchill.__main__ as main
+
+
+def runtest(tc):
+    tc.setUp()
+    tc.run()
+    tc.tearDown()
+
+class TestMain(unittest.TestCase):
+    def setUp(self):
+        self.chill_dev_src = os.getenv('CHILL_DEV_SRC')
+        self.chill_release_src = os.getenv('CHILL_RELEASE_SRC')
+        self.omega_dev_src = os.getenv('OMEGA_DEV_SRC')
+        self.omega_release_src = os.getenv('OMEGA_RELEASE_SRC')
+        self.staging_dir_bin = os.getenv('STAGING_DIR_BIN')
+        self.staging_dir_wd = os.getenv('STAGING_DIR_WD')
+    
+    def test_main_parse_chillbuild(self):
+        pass
+    
+    def test_main_parse_chill_dev(self):
+        tclist = main.args_to_tclist('-b {} chill-testcase path/to/somescript.script path/to/somesrc.c'.format(self.staging_dir_bin).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        
+        self.assertEqual(tc.config.chill_dir, None)
+        self.assertEqual(tc.config.bin_dir, self.staging_dir_bin)
+        self.assertEqual(tc.config.build_cuda, False)
+        self.assertEqual(tc.config.version, 'dev')
+        self.assertEqual(tc.config.script_lang, 'script')
+        
+        self.assertEqual(tc.name, 'chill:somescript.script')
+        self.assertEqual(tc.wd, os.getcwd())
+        self.assertEqual(tc.chill_bin, os.path.join(self.staging_dir_bin, 'chill'))
+        self.assertEqual(tc.chill_script, 'somescript.script')
+        self.assertEqual(tc.chill_src, 'somesrc.c')
+        self.assertEqual(tc.chill_script_path, os.path.join(os.getcwd(), 'path/to/somescript.script'))
+        self.assertEqual(tc.chill_src_path, os.path.join(os.getcwd(), 'path/to/somesrc.c'))
+        self.assertEqual(tc.chill_gensrc, 'rose_somesrc.c')
+    
+    def test_main_parse_chill_lua_dev(self):
+        tclist = main.args_to_tclist('-b {} chill-testcase path/to/somescript.lua path/to/somesrc.c'.format(self.staging_dir_bin).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        
+        self.assertEqual(tc.config.chill_dir, None)
+        self.assertEqual(tc.config.bin_dir, self.staging_dir_bin)
+        self.assertEqual(tc.config.build_cuda, False)
+        self.assertEqual(tc.config.version, 'dev')
+        self.assertEqual(tc.config.script_lang, 'lua')
+        
+        self.assertEqual(tc.name, 'chill-lua:somescript.lua')
+        self.assertEqual(tc.wd, os.getcwd())
+        self.assertEqual(tc.chill_bin, os.path.join(self.staging_dir_bin, 'chill-lua'))
+        self.assertEqual(tc.chill_script, 'somescript.lua')
+        self.assertEqual(tc.chill_src, 'somesrc.c')
+        self.assertEqual(tc.chill_script_path, os.path.join(os.getcwd(), 'path/to/somescript.lua'))
+        self.assertEqual(tc.chill_src_path, os.path.join(os.getcwd(), 'path/to/somesrc.c'))
+        self.assertEqual(tc.chill_gensrc, 'rose_somesrc.c')
+    
+    def test_main_parse_chill_python_dev(self):
+        tclist = main.args_to_tclist('-b {} chill-testcase path/to/somescript.py path/to/somesrc.c'.format(self.staging_dir_bin).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        
+        self.assertEqual(tc.config.chill_dir, None)
+        self.assertEqual(tc.config.bin_dir, self.staging_dir_bin)
+        self.assertEqual(tc.config.build_cuda, False)
+        self.assertEqual(tc.config.version, 'dev')
+        self.assertEqual(tc.config.script_lang, 'python')
+        
+        self.assertEqual(tc.name, 'chill-python:somescript.py')
+        self.assertEqual(tc.wd, os.getcwd())
+        self.assertEqual(tc.chill_bin, os.path.join(self.staging_dir_bin, 'chill-python'))
+        self.assertEqual(tc.chill_script, 'somescript.py')
+        self.assertEqual(tc.chill_src, 'somesrc.c')
+        self.assertEqual(tc.chill_script_path, os.path.join(os.getcwd(), 'path/to/somescript.py'))
+        self.assertEqual(tc.chill_src_path, os.path.join(os.getcwd(), 'path/to/somesrc.c'))
+        self.assertEqual(tc.chill_gensrc, 'rose_somesrc.c')
+    
+    def test_main_parse_cudachill_dev(self):
+        tclist = main.args_to_tclist('-b {} chill-testcase -u path/to/somescript.lua path/to/somesrc.c'.format(self.staging_dir_bin).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        
+        self.assertEqual(tc.config.chill_dir, None)
+        self.assertEqual(tc.config.bin_dir, self.staging_dir_bin)
+        self.assertEqual(tc.config.build_cuda, True)
+        self.assertEqual(tc.config.version, 'dev')
+        self.assertEqual(tc.config.script_lang, 'lua')
+        
+        self.assertEqual(tc.name, 'cuda-chill:somescript.lua')
+        self.assertEqual(tc.wd, os.getcwd())
+        self.assertEqual(tc.chill_bin, os.path.join(self.staging_dir_bin, 'cuda-chill'))
+        self.assertEqual(tc.chill_script, 'somescript.lua')
+        self.assertEqual(tc.chill_src, 'somesrc.c')
+        self.assertEqual(tc.chill_script_path, os.path.join(os.getcwd(), 'path/to/somescript.lua'))
+        self.assertEqual(tc.chill_src_path, os.path.join(os.getcwd(), 'path/to/somesrc.c'))
+        self.assertEqual(tc.chill_gensrc, 'rose_somesrc.cu')
+    
+    def test_main_parse_cudachill_python_dev(self):
+        tclist = main.args_to_tclist('-b {} chill-testcase -u path/to/somescript.py path/to/somesrc.c'.format(self.staging_dir_bin).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        
+        self.assertEqual(tc.config.chill_dir, None)
+        self.assertEqual(tc.config.bin_dir, self.staging_dir_bin)
+        self.assertEqual(tc.config.build_cuda, True)
+        self.assertEqual(tc.config.version, 'dev')
+        self.assertEqual(tc.config.script_lang, 'python')
+        
+        self.assertEqual(tc.name, 'cuda-chill-python:somescript.py')
+        self.assertEqual(tc.wd, os.getcwd())
+        self.assertEqual(tc.chill_bin, os.path.join(self.staging_dir_bin, 'cuda-chill-python'))
+        self.assertEqual(tc.chill_script, 'somescript.py')
+        self.assertEqual(tc.chill_src, 'somesrc.c')
+        self.assertEqual(tc.chill_script_path, os.path.join(os.getcwd(), 'path/to/somescript.py'))
+        self.assertEqual(tc.chill_src_path, os.path.join(os.getcwd(), 'path/to/somesrc.c'))
+        self.assertEqual(tc.chill_gensrc, 'rose_somesrc.cu')
+    
+    def test_main_parse_chill_release(self):
+        tclist = main.args_to_tclist('-b {} chill-testcase -v release path/to/somescript.script path/to/somesrc.c'.format(self.staging_dir_bin).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        self.assertEqual(tc.name, 'chill-release:somescript.script')
+        self.assertEqual(tc.wd, os.getcwd())
+        self.assertEqual(tc.chill_bin, os.path.join(self.staging_dir_bin, 'chill-release'))
+        self.assertEqual(tc.chill_script, 'somescript.script')
+        self.assertEqual(tc.chill_src, 'somesrc.c')
+        self.assertEqual(tc.chill_script_path, os.path.join(os.getcwd(), 'path/to/somescript.script'))
+        self.assertEqual(tc.chill_src_path, os.path.join(os.getcwd(), 'path/to/somesrc.c'))
+        self.assertEqual(tc.chill_gensrc, 'rose_somesrc.c')
+    
+    def test_main_parse_chill_release(self):
+        tclist = main.args_to_tclist('-b {} chill-testcase -uv release path/to/somescript.lua path/to/somesrc.c'.format(self.staging_dir_bin).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        self.assertEqual(tc.name, 'cuda-chill-release:somescript.lua')
+        self.assertEqual(tc.wd, os.getcwd())
+        self.assertEqual(tc.chill_bin, os.path.join(self.staging_dir_bin, 'cuda-chill-release'))
+        self.assertEqual(tc.chill_script, 'somescript.lua')
+        self.assertEqual(tc.chill_src, 'somesrc.c')
+        self.assertEqual(tc.chill_script_path, os.path.join(os.getcwd(), 'path/to/somescript.lua'))
+        self.assertEqual(tc.chill_src_path, os.path.join(os.getcwd(), 'path/to/somesrc.c'))
+        self.assertEqual(tc.chill_gensrc, 'rose_somesrc.cu')
+    
+    def test_main_parse_chillbuild_dev(self):
+        tclist = main.args_to_tclist('-b {} -C {} build-chill-testcase'.format(self.staging_dir_bin, self.chill_dev_src).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        self.assertEqual(tc.name, 'chill')
+        self.assertEqual(tc.config.bin_dir, self.staging_dir_bin)
+        self.assertEqual(tc.config.chill_dir, self.chill_dev_src)
+        self.assertEqual(tc.config.script_lang, 'script')
+    
+    def test_main_parse_chillbuild_lua_dev(self):
+        tclist = main.args_to_tclist('-b {} -C {} build-chill-testcase -i lua'.format(self.staging_dir_bin, self.chill_dev_src).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        self.assertEqual(tc.name, 'chill-lua')
+        self.assertEqual(tc.config.bin_dir, self.staging_dir_bin)
+        self.assertEqual(tc.config.chill_dir, self.chill_dev_src)
+        self.assertEqual(tc.config.script_lang, 'lua')
+    
+    def test_main_parse_chillbuild_python_dev(self):
+        tclist = main.args_to_tclist('-b {} -C {} build-chill-testcase -i python'.format(self.staging_dir_bin, self.chill_dev_src).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        self.assertEqual(tc.name, 'chill-python')
+        self.assertEqual(tc.config.bin_dir, self.staging_dir_bin)
+        self.assertEqual(tc.config.chill_dir, self.chill_dev_src)
+        self.assertEqual(tc.config.script_lang, 'python')
+    
+    def test_main_parse_chillbuild_cuda_dev(self):
+        tclist = main.args_to_tclist('-b {} -C {} build-chill-testcase -u'.format(self.staging_dir_bin, self.chill_dev_src).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        self.assertEqual(tc.name, 'cuda-chill')
+        self.assertEqual(tc.config.bin_dir, self.staging_dir_bin)
+        self.assertEqual(tc.config.chill_dir, self.chill_dev_src)
+        self.assertEqual(tc.config.script_lang, 'lua')
+    
+    def test_main_parse_chillbuild_cuda_python_dev(self):
+        tclist = main.args_to_tclist('-b {} -C {} build-chill-testcase -u -i python'.format(self.staging_dir_bin, self.chill_dev_src).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        self.assertEqual(tc.name, 'cuda-chill-python')
+        self.assertEqual(tc.config.bin_dir, self.staging_dir_bin)
+        self.assertEqual(tc.config.chill_dir, self.chill_dev_src)
+        self.assertEqual(tc.config.script_lang, 'python')
+    
+    def test_main_parse_chillbuild_release(self):
+        tclist = main.args_to_tclist('-b {} -C {} build-chill-testcase -v release'.format(self.staging_dir_bin, self.chill_dev_src).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        self.assertEqual(tc.name, 'chill-release')
+        self.assertEqual(tc.config.bin_dir, self.staging_dir_bin)
+        self.assertEqual(tc.config.chill_dir, self.chill_dev_src)
+        self.assertEqual(tc.config.script_lang, 'script')
+    
+    def test_main_parse_chillbuild_cuda_release(self):
+        tclist = main.args_to_tclist('-b {} -C {} build-chill-testcase -u -v release'.format(self.staging_dir_bin, self.chill_dev_src).split(), coverage_set=gcov.GcovSet())
+        tc = tclist[0]
+        self.assertEqual(tc.name, 'cuda-chill-release')
+        self.assertEqual(tc.config.bin_dir, self.staging_dir_bin)
+        self.assertEqual(tc.config.chill_dir, self.chill_dev_src)
+        self.assertEqual(tc.config.script_lang, 'lua')
+    
+    def test_main_tctree(self):
+        tclist = main.args_to_tclist('batch test-cases/unit/chill-basic.tclist'.split(), coverage_set=gcov.GcovSet())
+        for tc in tclist:
+            runtest(tc)
+
+    
diff --git a/test-chill/unit-tests/test__cpp_validate_env.py b/test-chill/unit-tests/test__cpp_validate_env.py
new file mode 100644
index 0000000..e0225cd
--- /dev/null
+++ b/test-chill/unit-tests/test__cpp_validate_env.py
@@ -0,0 +1,377 @@
+import ast
+import functools
+import itertools
+import pylang.debug
+import random
+import struct
+import unittest
+
+import testchill
+import testchill._cpp_validate_env as validate_env
+import testchill.cpp_validate
+import testchill.util
+
+## Support functions ##
+class Point(object):
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+def _compile_and_run(expr, target_type, bindings):
+    t = ast.fix_missing_locations(ast.Expression(expr.compile_expr(target_type)))
+    return eval(compile(t, '<string>', 'eval'), bindings)
+
+def _compile_and_invoke(expr, target_type, bindings, args):
+    t = ast.fix_missing_locations(ast.Expression(expr.compile_expr(target_type)))
+    return (eval(compile(t, '<string>', 'eval'), bindings))(*args)
+
+def _expr_test(tc, expr, fv_bindings, rt_bindings, target_type, exp_freevars, exp_value):
+    freevars = expr.getfreevars(fv_bindings)
+    value = _compile_and_run(expr, target_type, rt_bindings)
+    tc.assertEqual(exp_freevars, freevars)
+    tc.assertEqual(exp_value, value)
+    tc.assertEqual(target_type, type(value))
+
+def _expr_test_list(tc, expr, fv_bindings, rt_bindings, target_type, exp_freevars, exp_value):
+    freevars = expr.getfreevars(fv_bindings)
+    value = _compile_and_run(expr, target_type, rt_bindings)
+    tc.assertEqual(exp_freevars, freevars)
+    tc.assertEqual(exp_value, value)
+    tc.assertEqual(list, type(value))
+
+def _expr_test_invoke(tc, expr, fv_bindings, rt_bindings, target_type, exp_freevars, invoke_args, exp_value):
+    freevars = expr.getfreevars(fv_bindings)
+    value = _compile_and_invoke(expr, target_type, rt_bindings, invoke_args)
+    tc.assertEqual(exp_freevars, freevars)
+    tc.assertEqual(exp_value, value)
+    tc.assertEqual(target_type.exprtype, type(value))
+
+def lambdatype(param_types, etype):
+    return validate_env._pylambdatype(param_types, etype)
+
+def arraytype(dims, etype):
+    return validate_env._pyarraytype(dims, etype)
+
+
+## Test case class ##
+class Test_CppValidateEnv(unittest.TestCase):
+    def setUp(self):
+        ### data for the abstract syntax tree ###
+        _const_4 = validate_env._ConstantExpr('4')
+        _const_3 = validate_env._ConstantExpr('3')
+        _const_2 = validate_env._ConstantExpr('2')
+        _const_0 = validate_env._ConstantExpr('0')
+        _name_x = validate_env._NameExpr('x')
+        _name_y = validate_env._NameExpr('y')
+        _name_p = validate_env._NameExpr('p')
+        _name_pow = validate_env._NameExpr('pow')
+        _attr_px = validate_env._AttributeExpr(_name_p, 'x')
+        _attr_py = validate_env._AttributeExpr(_name_p, 'y')
+        _add_3_2 = validate_env._BinExpr(_const_3, '+', _const_2)
+        _add_x_2 = validate_env._BinExpr(_name_x, '+', _const_2)
+        _pow_x_2 = validate_env._BinExpr(_name_x, '**', _const_2)
+        
+        _name_i = validate_env._NameExpr('i')
+        _lambda_i = validate_env._LambdaExpr(['i'],_name_i)
+        
+        _name_j = validate_env._NameExpr('j')
+        _const_10 = validate_env._ConstantExpr('10')
+        _mul_i_10 = validate_env._BinExpr(_name_i, '*', _const_10)
+        _add_mul_i_10_j = validate_env._BinExpr(_mul_i_10, '+', _name_j)
+        _lambda_ij = validate_env._LambdaExpr(['i','j'],_add_mul_i_10_j)
+        
+        self._ConstantExpr_test_data = [
+                (('3',), set(), dict(), int, set(), int(3)),
+                (('3',), set(), dict(), float, set(), float(3))
+            ]
+        self._NameExpr_test_data = [
+                (('x',), set(), {'x':3}, int, {'x'}, int(3)),
+                (('x',), {'x'}, {'x':3}, int, set(), int(3))
+            ]
+        self._AttributeExpr_test_data = [
+                ((validate_env._NameExpr('p'),'x'), set(), {'p':Point(3,0)}, int, {'p'}, int(3)),
+                ((validate_env._NameExpr('p'),'x'), {'p'}, {'p':Point(3,0)}, int, set(), int(3))
+            ]
+        self._BinExpr_test_data = [
+                ((_const_3, '+', _const_2), set(), dict(), int, set(), int(5)),
+                ((_const_3, '+', _const_2), set(), dict(), float, set(), float(5)),
+                ((_name_x, '+', _const_2), set(), {'x':3}, int, {'x'}, int(5)),
+                ((_name_x, '+', _const_2), {'x'}, {'x':3}, int, set(), int(5)),
+                ((_const_3, '+', _name_x), set(), {'x':2}, int, {'x'}, int(5)),
+                ((_const_3, '+', _name_x), {'x'}, {'x':2}, int, set(), int(5)),
+                ((_const_3, '-', _const_2), set(), dict(), int, set(), int(1)),
+                ((_const_3, '*', _const_2), set(), dict(), int, set(), int(6)),
+                ((_const_3, '/', _const_2), set(), dict(), int, set(), int(1)),
+                ((_const_3, '**', _const_2), set(), dict(), int, set(), int(9))
+            ]
+        self._UnaryExpr_test_data = [
+                (('-', _const_3), set(), dict(), int, set(), int(-3)),
+                (('-', _add_3_2), set(), dict(), int, set(), int(-5)),
+                (('-', _add_x_2), set(), {'x':3}, int, {'x'}, int(-5)),
+                (('-', _add_x_2), {'x'}, {'x':3}, int, set(), int(-5))
+            ]
+        self._LambdaExpr_test_data = [
+                (([],_const_3), set(), dict(), lambdatype([],int), set(), tuple(), int(3)),
+                (([],_name_x), set(), {'x':3}, lambdatype([],int), {'x'}, tuple(), int(3)),
+                ((['x'],_pow_x_2), set(), dict(), lambdatype([int],int), set(), (int(4),), int(16))
+            ]
+        self._InvokeExpr_test_data = [
+                ((_name_pow,[_const_3, _const_2]), set(), dict(), int, {'pow'}, int(9)),
+            ]
+        self._MatrixGenerator_test_data = [
+                (([_const_2],_lambda_i), set(), {'_pyitertools': itertools}, arraytype([None],int), set(), [0, 1]),
+                (([None],_lambda_i), set(), {'_pyitertools': itertools}, arraytype([_const_2],int), set(), [0, 1]),
+                (([_const_2,_const_3],_lambda_ij), set(), {'_pyitertools': itertools}, arraytype([_const_2,_const_3], int), set(), [0, 1, 2, 10, 11, 12]),
+                (([_const_2,_const_3],_lambda_ij), set(), {'_pyitertools': itertools}, arraytype([None,None], int), set(), [0, 1, 2, 10, 11, 12]),
+                (([_const_2,None],_lambda_ij), set(), {'_pyitertools': itertools}, arraytype([None,_const_3], int), set(), [0, 1, 2, 10, 11, 12]),
+                (([None,_const_3],_lambda_ij), set(), {'_pyitertools': itertools}, arraytype([_const_2,None], int), set(), [0, 1, 2, 10, 11, 12]),
+                (([None,None],_lambda_ij), set(), {'_pyitertools': itertools}, arraytype([_const_2,_const_3], int), set(), [0, 1, 2, 10, 11, 12]),
+                (([_name_x],_lambda_i), set(), {'_pyitertools': itertools, 'x':2}, arraytype([None],int), {'x'}, [0, 1]),
+                (([None],_lambda_i), set(), {'_pyitertools': itertools, 'x':2}, arraytype([_name_x],int), set(), [0, 1]),
+            ]
+        self._RandomExpr_test_state = random.getstate()
+        self._RandomExpr_test_data = [
+                ((_const_0,_const_4), set(), {'_pyrandom': random}, int, set(), int(random.random()*4)),
+                ((_const_0,_name_x), set(), {'_pyrandom': random, 'x':4}, int, {'x'}, int(random.random()*4)),
+                ((_name_x,_const_4), set(), {'_pyrandom': random, 'x':0}, int, {'x'}, int(random.random()*4)),
+            ]
+        ### data for data generating ###
+        _name_ambn = validate_env._NameExpr('ambn')
+        _name_an   = validate_env._NameExpr('an')
+        _name_bm   = validate_env._NameExpr('bm')
+        _name_even2 = validate_env._NameExpr('evendist2')
+        _lambda_ij_0 = validate_env._LambdaExpr(['i','j'],_const_0)
+        _matrix_2_an_ambn_even2 = validate_env._MatrixGenerator([_name_an,_name_ambn],_name_even2)
+        _matrix_2_ambn_bm_even2 = validate_env._MatrixGenerator([_name_ambn,_name_bm],_name_even2)
+        _matrix_2_an_bm_lambda_ij_0 = validate_env._MatrixGenerator([_name_an,_name_bm],_lambda_ij_0)
+        _add_an_bm = validate_env._BinExpr(_name_an, '+', _name_bm)
+        _int_type = validate_env._CppPrimitiveType.get_from_cppname('int')
+        _float_type = validate_env._CppPrimitiveType.get_from_cppname('float')
+        _float_ptr_type = validate_env._CppPointerType(_float_type)
+        _param_A    = validate_env._Parameter('A',   _float_ptr_type,'in', _matrix_2_an_ambn_even2)
+        _param_B    = validate_env._Parameter('B',   _float_ptr_type,'in', _matrix_2_ambn_bm_even2)
+        _param_C    = validate_env._Parameter('C',   _float_ptr_type,'out',_matrix_2_an_bm_lambda_ij_0)
+        _param_ambn = validate_env._Parameter('ambn',_int_type,      'in', _add_an_bm)
+        _param_an   = validate_env._Parameter('an',  _int_type,      'in', _const_2)
+        _param_bm   = validate_env._Parameter('bm',  _int_type,      'in', _const_3)
+        self._Parameter_order_by_freevars_test_data = [
+                ([_param_A, _param_B, _param_C, _param_ambn, _param_an, _param_bm], ['an','bm','C','ambn','A','B'])
+            ]
+        _float_3_type = validate_env._CppArrayType(_float_type, [_const_3])
+        _float_3_2_type = validate_env._CppArrayType(_float_type, [_const_3,_const_2])
+        _name_N = validate_env._NameExpr('N')
+        _float_N_type = validate_env._CppArrayType(_float_type, [_name_N])
+        _float_N_2_type = validate_env._CppArrayType(_float_type, [_name_N,_const_2])
+        self._CppType_statictype_test_data = [
+                ((_int_type, dict()), 'int'),
+                ((_float_ptr_type, dict()), 'float*'),
+                ((_float_3_type, dict()), 'float[3]'),
+                ((_float_N_type, {'N': 3}), 'float[3]'),
+                ((_float_N_2_type, {'N': 3}), 'float[3][2]')
+            ]
+        _int_ptr_type = validate_env._CppPointerType(_int_type)
+        _int_ptr_ptr_type = validate_env._CppPointerType(_int_ptr_type)
+        _int_3_type = validate_env._CppArrayType(_int_type, [_const_3])
+        _int_N_type = validate_env._CppArrayType(_int_type, [_name_N])
+        _int_3_2_type = validate_env._CppArrayType(_int_type, [_const_3, _const_2])
+        joinbytes = lambda b: functools.reduce(lambda a,v: a+v,b)
+        self._CppType_formatdata_test_data = [
+                ((_int_type,         dict(), 3),                 ([1],     struct.pack('i',3))),
+                ((_float_type,       dict(), float(3)),          ([1],     struct.pack('f',float(3)))),
+                ((_int_3_type,       dict(), list(range(3))),    ([3],     joinbytes([struct.pack('i',i) for i in range(3)]))),
+                ((_int_3_2_type,     dict(), list(range(6))),    ([3,2],   joinbytes([struct.pack('i',i) for i in range(6)]))),
+                ((_int_ptr_type,     dict(), 3),                 ([1,1],   struct.pack('i',3))),
+                ((_int_ptr_type,     dict(), list(range(3))),    ([3,1],   joinbytes([struct.pack('i',i) for i in range(3)]))),
+                ((_int_ptr_ptr_type, dict(), list(range(3))),    ([3,1,1], joinbytes([struct.pack('i',i) for i in range(3)]))),
+                ((_int_ptr_ptr_type, dict(), [[0,1,2],[3,4,5]]), ([2,3,1], joinbytes([struct.pack('i',i) for i in range(6)]))),
+            ]
+        evendist2 = lambda i,j: random.random()
+        random.seed(0)
+        self._Parameter_generatedata_test_state = random.getstate()
+        if testchill.util.python_version_major == 2:
+            self._Parameter_generatedata_test_data = [
+                    ((_param_A,    {'an':2, 'ambn':5,'evendist2':evendist2}), ('A', 'float*', [10, 1], '\x08,X?M\tB?)U\xd7>\xbc\x90\x84>\xe6\xe2\x02?\x87S\xcf>\x06\xa7H?\xceK\x9b>\x84\x04\xf4>\x86X\x15?')),
+                    ((_param_B,    {'ambn':5, 'bm':3,'evendist2':evendist2}), ('B', 'float*', [15, 1], '\x16zh?(3\x01?\rM\x90>b|A?nM\x1e?^B\x80>!\xe5h?\xd4\x97{?fjO?Y\xf4f?\xaa\xcb\x9e>A\xd6:?D\x1af?\x92\x19/?\xb1\xbc\xf1>')),
+                    ((_param_C,    {'an':2, 'bm':3, 'evendist2':evendist2}),  ('C', 'float*', [6, 1], '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')),
+                    ((_param_ambn, {'an':2, 'bm':3}),                         ('ambn', 'int', [1], '\x05\x00\x00\x00')),
+                    ((_param_an,   dict()),                                   ('an', 'int', [1], '\x02\x00\x00\x00')),
+                    ((_param_bm,   dict()),                                   ('bm', 'int', [1], '\x03\x00\x00\x00'))
+                ]
+        else:
+            self._Parameter_generatedata_test_data = [
+                    ((_param_A,    {'an':2, 'ambn':5,'evendist2':evendist2}), ('A', 'float*', [10, 1], b'\x08,X?M\tB?)U\xd7>\xbc\x90\x84>\xe6\xe2\x02?\x87S\xcf>\x06\xa7H?\xceK\x9b>\x84\x04\xf4>\x86X\x15?')),
+                    ((_param_B,    {'ambn':5, 'bm':3,'evendist2':evendist2}), ('B', 'float*', [15, 1], b'\x16zh?(3\x01?\rM\x90>b|A?nM\x1e?^B\x80>!\xe5h?\xd4\x97{?fjO?Y\xf4f?\xaa\xcb\x9e>A\xd6:?D\x1af?\x92\x19/?\xb1\xbc\xf1>')),
+                    ((_param_C,    {'an':2, 'bm':3, 'evendist2':evendist2}),  ('C', 'float*', [6, 1], b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')),
+                    ((_param_ambn, {'an':2, 'bm':3}),                         ('ambn', 'int', [1], b'\x05\x00\x00\x00')),
+                    ((_param_an,   dict()),                                   ('an', 'int', [1], b'\x02\x00\x00\x00')),
+                    ((_param_bm,   dict()),                                   ('bm', 'int', [1], b'\x03\x00\x00\x00'))
+                ]
+        ### data for parsing ###
+        self.parse_procedure_test_data = [
+                (('procedure void q()',), ('void', 'q', 0)),
+                (('procedure int q()',), ('int', 'q', 0)),
+                (('procedure float q()',), ('float', 'q', 0)),
+                (('procedure unsigned int q()',), ('unsigned int', 'q', 0)),
+                (('procedure void q(in int x)',), ('void', 'q', 1)),
+                (('procedure void q(in int x, in int y)',), ('void', 'q', 2)),
+            ]
+        _mm_proc_expr = '''
+            procedure void mm(
+                in  float* A    = matrix([an,ambn],evendist2),
+                in  float* B    = matrix([ambn,bm],evendist2),
+                out float* C    = matrix([an,bm],lambda i,j: 0),
+                in  int    ambn = an + bm,
+                in  int    an   = 2,
+                in  int    bm   = 3)
+            '''
+        self.parse_parameter_test_data = [
+                (('procedure void mm(in int x)',), [(0, 'x', 'int', 'in', False, set())]),
+                (('procedure void mm(out int* x = 10)',), [(0, 'x', 'int*', 'out', True, set())]),
+                ((_mm_proc_expr,),[
+                        (0, 'A', 'float*', 'in', True, set(['an','ambn','evendist2'])),
+                        (1, 'B', 'float*', 'in', True, set(['ambn','bm','evendist2'])),
+                        (2, 'C', 'float*', 'out', True, set(['an','bm'])),
+                        (3, 'ambn', 'int', 'in', True, set(['an','bm'])),
+                        (4, 'an', 'int', 'in', True, set([])),
+                        (5, 'bm', 'int', 'in', True, set([]))
+                    ]),
+            ]
+        ### data for code generation ###
+        _float_2d_type = validate_env._CppArrayType(_float_type, [_name_an,_name_ambn])
+        self._CppType_statictype_test_data = [
+                ((_float_2d_type, {'an':2,'ambn':5}), 'float[2][5]')
+            ]
+        self._CppType_get_cdecl_stmt_test_data = [
+                ((_float_2d_type, 'A', {'an':2,'ambn':5}), 'float A[2][5];')
+            ]
+        self._CppType_get_cread_stmt_test_data = [
+                ((_float_2d_type, 'A', {'an':2,'ambn':5}, 'datafile_initialize', [10,1]), 'datafile_initialize.read((char*)A, 10*sizeof(float));')
+            ]
+        self._CppType_get_cwrite_stmt_test_data = [
+                ((_float_2d_type, 'A', {'an':2,'ambn':5}, 'datafile_out', [10,1]), 'datafile_out.write((char*)A, 10*sizeof(float));')
+            ]
+    
+    def run_expr_test_data(self, ctor, test_data):
+        for ctor_args, fv_bindings, rt_bindings, target_type, exp_freevars, exp_value in test_data:
+            expr = ctor(*ctor_args)
+            _expr_test(self, expr, fv_bindings, rt_bindings, target_type, exp_freevars, exp_value)
+    
+    def run_expr_test_data_list(self, ctor, test_data):
+        for ctor_args, fv_bindings, rt_bindings, target_type, exp_freevars, exp_value in test_data:
+            expr = ctor(*ctor_args)
+            _expr_test_list(self, expr, fv_bindings, rt_bindings, target_type, exp_freevars, exp_value)
+    
+    def run_expr_test_data_invoke(self, ctor, test_data):
+        for ctor_args, fv_bindings, rt_bindings, target_type, exp_freevars, invoke_args, exp_value in test_data:
+            expr = ctor(*ctor_args)
+            _expr_test_invoke(self, expr, fv_bindings, rt_bindings, target_type, exp_freevars, invoke_args, exp_value)
+    
+    def test__ConstantExpr(self):
+        self.run_expr_test_data(validate_env._ConstantExpr, self._ConstantExpr_test_data)
+    
+    def test__NameExpr(self):
+        self.run_expr_test_data(validate_env._NameExpr, self._NameExpr_test_data)
+    
+    def test__AttributeExpr(self):
+        self.run_expr_test_data(validate_env._AttributeExpr, self._AttributeExpr_test_data)
+    
+    def test__UnaryExpr(self):
+        self.run_expr_test_data(validate_env._UnaryExpr, self._UnaryExpr_test_data)
+    
+    def test__LambdaExpr(self):
+        self.run_expr_test_data_invoke(validate_env._LambdaExpr, self._LambdaExpr_test_data)
+    
+    def test__InvokeExpr(self):
+        self.run_expr_test_data(validate_env._InvokeExpr, self._InvokeExpr_test_data)
+    
+    def test__MatrixGenerator(self):
+        self.run_expr_test_data_list(validate_env._MatrixGenerator, self._MatrixGenerator_test_data)
+    
+    def test__RandomExpr(self):
+        random.setstate(self._RandomExpr_test_state)
+        self.run_expr_test_data(validate_env._RandomExpr, self._RandomExpr_test_data)
+    
+    def test_parse_procedure(self):
+        parse_func = testchill.cpp_validate._parse_testproc_script
+        for args, expected in self.parse_procedure_test_data:
+            rtype_exp, name_exp, param_count_exp = expected
+            proc = parse_func(*args)
+            self.assertEqual(str(proc.rtype), rtype_exp)
+            self.assertEqual(proc.name, name_exp)
+            self.assertEqual(len(proc.parameters), param_count_exp)
+    
+    def test_parse_parameter(self):
+        #pylang.debug.enable(['pylang.parser.BaseTextParser.parse'])
+        parse_func = testchill.cpp_validate._parse_testproc_script
+        for args, expected in self.parse_parameter_test_data:
+            proc = parse_func(*args)
+            for param_exp in expected:
+                index, name_exp, ctype_exp, direction_exp, has_init_exp, freevars_exp = param_exp
+                param = proc.parameters[index]
+                self.assertEqual(param.name, name_exp)
+                self.assertEqual(str(param.cpptype), ctype_exp)
+                self.assertEqual(param.direction, direction_exp)
+                self.assertEqual(param.init_expr is not None, has_init_exp)
+                self.assertEqual(param.getfreevars(), freevars_exp)
+        #pylang.debug.enable(['pylang.parser.BaseTextParser.parse'], False)
+    
+    def test__Parameter_order_by_freevars(self):
+        def testfunc(param_list):
+            return [p.name for p in validate_env._Parameter.order_by_freevars(param_list)]
+        for arg, expected in self._Parameter_order_by_freevars_test_data:
+            self.assertEqual(testfunc(arg),expected)
+    
+    def test__CppType_statictype(self):
+        def testfunc(ctype, glbls):
+            return str(ctype.statictype(glbls))
+        for args, expected in self._CppType_statictype_test_data:
+            self.assertEqual(testfunc(*args), expected)
+    
+    def test__CppType_formatdata(self):
+        def testfunc(ctype, glbls, data):
+            return ctype.statictype(glbls).formatdata(data)
+        for args, expected in self._CppType_formatdata_test_data:
+            dim_exp, bytes_exp = expected
+            dim_val, bytes_val = testfunc(*args)
+            self.assertEqual(dim_val, dim_exp)
+            self.assertEqual(bytes_val, bytes_exp)
+    
+    def test__CppType_statictype(self):
+        def testfunc(t, bindings):
+            return str(t.statictype(bindings))
+        for args, typename in self._CppType_statictype_test_data:
+            self.assertEqual(testfunc(*args), typename)
+    
+    def test__CppType_get_cdecl_stmt(self):
+        def testfunc(t, param_name, bindings):
+            return t.statictype(bindings).get_cdecl_stmt(param_name)
+        for args, decl_exp in self._CppType_get_cdecl_stmt_test_data:
+            decl_val = testfunc(*args)
+            self.assertEqual(decl_val, decl_exp)
+    
+    def test__CppType_get_cread_stmt(self):
+        def testfunc(t, param_name, bindings, stream, dims):
+            return t.statictype(bindings).get_cread_stmt(param_name, stream, dims)
+        for args, decl_exp in self._CppType_get_cread_stmt_test_data:
+            decl_val = testfunc(*args)
+            self.assertEqual(decl_val, decl_exp)
+    
+    def test__CppType_get_cwrite_stmt(self):
+        def testfunc(t, param_name, bindings, stream, dims):
+            return t.statictype(bindings).get_cwrite_stmt(param_name, stream, dims)
+        for args, decl_exp in self._CppType_get_cwrite_stmt_test_data:
+            decl_val = testfunc(*args)
+            self.assertEqual(decl_val, decl_exp)
+    
+    def test__Parameter_generatedata(self):
+        def testfunc(param, glbls):
+            return param.generatedata(glbls)
+        for args, expected in self._Parameter_generatedata_test_data:
+            name_val, type_val, dims_val, data_val = testfunc(*args)
+            name_exp, type_exp, dims_exp, data_exp = expected
+            #print((name_val,type_val,dims_val,data_val))
+            self.assertEqual(name_val, name_exp)
+            self.assertEqual(str(type_val), type_exp)
+            self.assertEqual(dims_val, dims_exp)
+            self.assertEqual(data_val, data_exp)
+    
diff --git a/test-chill/unit-tests/test__extract.py b/test-chill/unit-tests/test__extract.py
new file mode 100644
index 0000000..72cba8a
--- /dev/null
+++ b/test-chill/unit-tests/test__extract.py
@@ -0,0 +1,48 @@
+import ast
+import unittest
+
+import testchill._extract as _extract
+import testchill.util as util
+
+class TestExtraction(unittest.TestCase):
+    def setUp(self):
+        self._TagExtractor_parse_test_data = [
+                (('a',''),                      []),
+                (('a','x<a>yy</a>z'),           [('yy', {})]),
+                (('a','x<a>yy</a>z<a>ww</a>g'), [('yy', {}), ('ww',{})]),
+                (('a','x<a>yy</a>z<b>ww</b>g'), [('yy', {})])
+            ]
+        self._commented_test_data = [
+                (('no comment here','cc'), []),
+                (('one comment //xxx\n','cc'), ['xxx']),
+                (('two comments //xxx\nunrelated//yyy\n', 'cc'), ['xxx','yyy']),
+                (('two comments //xxx\nunrelated//yyy', 'cc'), ['xxx','yyy']),
+                (('ss/*x\ny\n*/z','cc'),['x\ny\n']),
+                (('ss/*x\ny\n*/z//q\nc','cc'),['x\ny\n','q']),
+                (('ss###x#\n','py'),['x#']),
+                (('ss"""x"""\n','py'),['x'])
+            ]
+    
+    def test__commented(self):
+        def run(txt, ext):
+            return list(_extract._TagExtractor._commented(txt, ext))
+        for args, res in self._commented_test_data:
+            self.assertEqual(run(*args), res)
+    
+    #def test_extract(self):
+    #    def testfunc(tag, txt):
+    #        temp = util.mktemp()
+    #        with open(temp, 'w') as f:
+    #            f.write(txt)
+    #        extracted = _extract._TagExtractor.extract_tag(tag, temp)
+    #        util.rmtemp()
+    #        return extracted
+    #        
+    #    for args, res in self.test_extract_data:
+    #        self.assertEqual(testfunc(*args), res)
+    
+    def test__TagExtractor_parse(self):
+        def testfunc(tag, txt):
+            return _extract._TagExtractor._parse(tag, txt)
+        for args, exp in self._TagExtractor_parse_test_data:
+            self.assertEqual(testfunc(*args), exp)
diff --git a/test-chill/unit-tests/test_chill.py b/test-chill/unit-tests/test_chill.py
new file mode 100644
index 0000000..8aaebfe
--- /dev/null
+++ b/test-chill/unit-tests/test_chill.py
@@ -0,0 +1,215 @@
+import logging
+import os
+import unittest
+
+import testchill.chill
+import testchill.gcov
+import testchill.test
+import testchill.util
+
+
+_runbuild=True
+
+def runtest(tclist):
+    if _runbuild:
+        for tc in tclist:
+            tc.setUp()
+            tc.setresult(tc.run())
+            tc.tearDown()
+
+def runchilltest(tclist, runvalidate=False, runstdout=False):
+    for tc in tclist:
+        tc.setUp()
+        tc.compile_src(tc)
+        tc.run_script(tc)
+        tc.compile_gensrc(tc)
+        if runvalidate:
+            tc.check_run_script_validate(tc)
+        if runstdout:
+            tc.check_run_script_stdout(tc)
+        tc.tearDown()
+
+class TestChillTestCases(unittest.TestCase):
+    def config(self, **kwargs):
+        cargs = {
+                'omega_dir': self.omega_dev_dir,
+                'chill_dir': self.chill_dev_dir,
+                'bin_dir': self.bin_dir,
+                'build_cuda': False,
+                'script_lang': None,
+                'version': 'dev'
+            }
+        cargs.update(kwargs)
+        return testchill.chill.ChillConfig(**cargs)
+    
+    def config_rel(self, **kwargs):
+        kwargs['version'] = 'release'
+        kwargs['omega_dir'] = self.omega_rel_dir
+        kwargs['chill_dir'] = self.chill_rel_dir
+        return self.config(**kwargs)
+    
+    def setUp(self):
+        self.chill_dev_dir = os.getenv('CHILL_DEV_SRC')
+        self.chill_rel_dir = os.getenv('CHILL_RELEASE_SRC')
+        self.omega_dev_dir = os.getenv('OMEGA_DEV_SRC')
+        self.omega_rel_dir = os.getenv('OMEGA_RELEASE_SRC')
+        self.bin_dir = os.getenv('STAGING_DIR_BIN')
+        self.wd = os.getenv('STAGING_DIR_WD')
+        self.build_options = {'coverage':False}
+        
+        testchill.util.shell('cp', [os.path.join(self.chill_dev_dir, 'examples/cuda-chill/cudaize.lua'), self.wd])
+        testchill.util.shell('cp', [os.path.join(self.chill_dev_dir, 'examples/cuda-chill/cudaize.py'), self.wd])
+        
+        self.config_test_func = {
+                0: lambda conf: conf.default_script_lang(),
+                1: lambda conf: conf.name(),
+                2: lambda conf: conf.make_depend_target(),
+                3: lambda conf: conf.make_target(),
+                4: lambda conf: conf.make_args()
+            }
+        self.config_test_data = [
+                ((self.omega_dev_dir, self.chill_dev_dir, self.bin_dir, False, None,     'dev'),     ('script', 'chill',              'depend-chill',      'chill')),
+                ((self.omega_dev_dir, self.chill_dev_dir, self.bin_dir, False, 'lua',    'dev'),     ('script', 'chill-lua',          'depend-chill',      'chill')),
+                ((self.omega_dev_dir, self.chill_dev_dir, self.bin_dir, False, 'python', 'dev'),     ('script', 'chill-python',       'depend-chill',      'chill')),
+                ((self.omega_dev_dir, self.chill_dev_dir, self.bin_dir, True,  None,     'dev'),     ('lua',    'cuda-chill',         'depend-cuda-chill', 'cuda-chill')),
+                ((self.omega_dev_dir, self.chill_dev_dir, self.bin_dir, True,  'python', 'dev'),     ('lua',    'cuda-chill-python',  'depend-cuda-chill', 'cuda-chill')),
+                ((self.omega_rel_dir, self.chill_rel_dir, self.bin_dir, False, None,     'release'), ('script', 'chill-release',      'depend',            'chill')),
+                ((self.omega_rel_dir, self.chill_rel_dir, self.bin_dir, True,  None,     'release'), ('lua',    'cuda-chill-release', 'depend-cuda-chill', 'cuda-chill'))
+            ]
+    
+    def tearDown(self):
+        pass
+    
+    def _run_ChillConfig_test(self, n):
+        for args, expected in self.config_test_data:
+            val = self.config_test_func[n](testchill.chill.ChillConfig(*args))
+            exp = expected[n]
+            self.assertEqual(val, exp)
+    
+    def test_ChillConfig_default_script_lang(self):
+        self._run_ChillConfig_test(0)
+    
+    def test_ChillConfig_name(self):
+        self._run_ChillConfig_test(1)
+    
+    def test_ChillConfig_make_depend_target(self):
+        self._run_ChillConfig_test(2)
+    
+    def test_ChillConfig_make_target(self):
+        self._run_ChillConfig_test(3)
+    
+    #def test_ChillConfig_make_args(self):
+    #    self._run_ChillConfig_test(4)
+    
+    def test_chill_dev(self):
+        tc = testchill.chill.BuildChillTestCase(self.config(), self.build_options)
+        self.assertEqual(tc.config.name(), 'chill')
+        self.assertEqual(tc.config.env()['OMEGAHOME'], self.omega_dev_dir)
+        self.assertEqual(tc.config.make_depend_target(), 'depend-chill')
+        self.assertEqual(tc.config.make_target(), 'chill')
+        self.assertEqual(tc.name, 'chill')
+        logging.info('Building ' + tc.name)
+        runtest([tc])
+    
+    def test_chill_dev_lua(self):
+        tc = testchill.chill.BuildChillTestCase(self.config(script_lang='lua'), self.build_options)
+        self.assertEqual(tc.config.name(), 'chill-lua')
+        self.assertEqual(tc.config.env()['OMEGAHOME'], self.omega_dev_dir)
+        self.assertEqual(tc.config.make_depend_target(), 'depend-chill')
+        self.assertEqual(tc.config.make_target(), 'chill')
+        self.assertEqual(tc.config.make_args(), 'SCRIPT_LANG=lua')
+        self.assertEqual(tc.name, 'chill-lua')
+        logging.info('Building ' + tc.name)
+        runtest([tc])
+    
+    def test_chill_dev_python(self):
+        tc = testchill.chill.BuildChillTestCase(self.config(script_lang='python'), self.build_options)
+        self.assertEqual(tc.config.name(), 'chill-python')
+        self.assertEqual(tc.config.env()['OMEGAHOME'], self.omega_dev_dir)
+        self.assertEqual(tc.config.make_depend_target(), 'depend-chill')
+        self.assertEqual(tc.config.make_target(), 'chill')
+        self.assertEqual(tc.config.make_args(), 'SCRIPT_LANG=python')
+        self.assertEqual(tc.name, 'chill-python')
+        logging.info('Building ' + tc.name)
+        runtest([tc])
+    
+    def test_cudachill_dev(self):
+        tc = testchill.chill.BuildChillTestCase(self.config(build_cuda=True), self.build_options)
+        self.assertEqual(tc.config.name(), 'cuda-chill')
+        self.assertEqual(tc.config.env()['OMEGAHOME'], self.omega_dev_dir)
+        self.assertEqual(tc.config.make_depend_target(), 'depend-cuda-chill')
+        self.assertEqual(tc.config.make_target(), 'cuda-chill')
+        self.assertEqual(tc.name, 'cuda-chill')
+        logging.info('Building ' + tc.name)
+        runtest([tc])
+    
+    def test_cudachill_dev(self):
+        tc = testchill.chill.BuildChillTestCase(self.config(build_cuda=True, script_lang='python'), self.build_options)
+        self.assertEqual(tc.config.name(), 'cuda-chill-python')
+        self.assertEqual(tc.config.env()['OMEGAHOME'], self.omega_dev_dir)
+        self.assertEqual(tc.config.make_depend_target(), 'depend-cuda-chill')
+        self.assertEqual(tc.config.make_target(), 'cuda-chill')
+        self.assertEqual(tc.name, 'cuda-chill-python')
+        logging.info('Building ' + tc.name)
+        runtest([tc])
+
+    def test_chill_release(self):
+        tc = testchill.chill.BuildChillTestCase(self.config_rel(), self.build_options)
+        self.assertEqual(tc.config.name(), 'chill-release')
+        self.assertEqual(tc.config.env()['OMEGAHOME'], self.omega_rel_dir)
+        self.assertEqual(tc.config.make_depend_target(), 'depend')
+        self.assertEqual(tc.config.make_target(), 'chill')
+        self.assertEqual(tc.name, 'chill-release')
+        logging.info('Building ' + tc.name)
+        runtest([tc])
+    
+    def test_cudachill_release(self):
+        tc = testchill.chill.BuildChillTestCase(self.config_rel(build_cuda=True), self.build_options)
+        self.assertEqual(tc.config.name(), 'cuda-chill-release')
+        self.assertEqual(tc.config.env()['OMEGAHOME'], self.omega_rel_dir)
+        self.assertEqual(tc.config.env()['CUDACHILL'], 'true')
+        self.assertEqual(tc.config.make_depend_target(), 'depend-cuda-chill')
+        self.assertEqual(tc.config.make_target(), 'cuda-chill')
+        self.assertEqual(tc.name, 'cuda-chill-release')
+        logging.info('Building ' + tc.name)
+        runtest([tc])
+    
+    def test_run_chill(self):
+        config = self.config()
+        btc = testchill.chill.BuildChillTestCase(config, self.build_options)
+        runtest([btc])
+        tc = testchill.chill.RunChillTestCase(config, 'test-cases/chill/test_scale.script', 'test-cases/chill/mm.c', wd=self.wd)
+        self.assertEqual(tc.chill_src, 'mm.c')
+        self.assertEqual(tc.chill_script, 'test_scale.script')
+        self.assertEqual(tc.chill_src_path, os.path.join(os.getcwd(), 'test-cases/chill/mm.c'))
+        self.assertEqual(tc.chill_script_path, os.path.join(os.getcwd(), 'test-cases/chill/test_scale.script'))
+        self.assertEqual(tc.chill_gensrc, 'rose_mm.c')
+        self.assertEqual(tc.name, 'chill:test_scale.script')
+        runchilltest([tc])
+    
+    def test_run_cudachill(self):
+        config = self.config(build_cuda=True)
+        btc = testchill.chill.BuildChillTestCase(config, self.build_options)
+        runtest([btc])
+        tc = testchill.chill.RunChillTestCase(config, 'test-cases/examples/cuda-chill/mm.lua', 'test-cases/examples/cuda-chill/mm.c', wd=self.wd)
+        self.assertEqual(tc.chill_src, 'mm.c')
+        self.assertEqual(tc.chill_script, 'mm.lua')
+        self.assertEqual(tc.chill_src_path, os.path.join(os.getcwd(), 'test-cases/examples/cuda-chill/mm.c'))
+        self.assertEqual(tc.chill_script_path, os.path.join(os.getcwd(), 'test-cases/examples/cuda-chill/mm.lua'))
+        self.assertEqual(tc.chill_gensrc, 'rose_mm.cu')
+        self.assertEqual(tc.name, 'cuda-chill:mm.lua')
+        runchilltest([tc])
+    
+    def test_chill_coverage(self):
+        tc = testchill.chill.BuildChillTestCase(self.config(), options={'coverage':True}, coverage_set=testchill.gcov.GcovSet())
+        self.assertEqual(tc.config.name(), 'chill')
+        self.assertEqual(tc.config.env()['OMEGAHOME'], self.omega_dev_dir)
+        self.assertEqual(tc.config.make_depend_target(), 'depend-chill')
+        self.assertEqual(tc.config.make_target(), 'chill')
+        self.assertEqual(tc.name, 'chill')
+        self.assertTrue(tc.options['coverage'])
+        logging.info('Building ' + tc.name)
+        if _runbuild:
+            runtest([tc])
+            self.assertTrue(os.path.exists(os.path.join(self.chill_dev_dir, 'ir_rose.gcno')))
+
diff --git a/test-chill/unit-tests/test_cpp_validate.py b/test-chill/unit-tests/test_cpp_validate.py
new file mode 100644
index 0000000..ed55c80
--- /dev/null
+++ b/test-chill/unit-tests/test_cpp_validate.py
@@ -0,0 +1,280 @@
+import functools
+import os
+import pprint
+import struct
+import unittest
+
+import testchill
+import testchill.util as util
+import testchill._cpp_validate_env as cpp_validate_env
+import testchill.cpp_validate as cpp_validate
+
+
+def listtodata(flist):
+    data = [struct.pack('f',n) for n in flist]
+    return functools.reduce(lambda a,v: a+v, data)
+
+class TestCppValidate(unittest.TestCase):
+    def setUp(self):
+        self.staging_dir_wd = os.getenv("STAGING_DIR_WD")
+        self.cpp_validate_dir = os.path.join(os.getcwd(),'unit-tests/cpp_validate_prog/')
+        self._parse_testproc_script_test_data = [
+                (('mm_one.testproc',),      None),
+                (('mm_one_with.testproc',), None)
+            ]
+        self._parse_testproc_python_test_data = [
+            ]
+        #self._generate_data_test_data = [
+        #        (('mm_one.cc','in'),               None),
+        #        (('mm_one.cc','out'),              None),
+        #        (('mm_one_with.cc','in'),          None),
+        #        (('mm_one_with.cc','out'),         None),
+        #        (('mm_one_defines.cc','in'),       None),
+        #        (('mm_one_defines.cc','out'),      None),
+        #        (('mm_one_with_defines.cc','in'),  None),
+        #        (('mm_one_with_defines.cc','out'), None),
+        #    ]
+        self._parse_testproc_iter_test_data = [
+                (('mm_one.cc',),
+                    [({'lang': 'script', 'name': 'mm_small', 'define':'dict()'},)]),
+                (('mm_one_with.cc',),
+                    [({'lang': 'script', 'name': 'mm_small', 'define':'dict()'},)]),
+                (('mm_one_defines.cc',),
+                    [({'lang': 'script', 'name': 'mm_small', 'define': "{'AN':3, 'BM':2, 'AMBN':5}"},)]),
+                (('mm_one_with_defines.cc',),
+                    [({'lang': 'script', 'name': 'mm_small', 'define': "{'AN':3, 'BM':2, 'AMBN':5}"},)])
+            ]
+        self._compile_gpp_test_data = [
+                ('mm_one_main.cc', 'mm_one')
+            ]
+        self._test_time_test_data = [
+                ((0.0034, 0.0025), True),
+                ((0.0025, 0.0034), False)
+            ]
+        self._test_validate_test_data = [
+                (('asdf', 'asdf'), True),
+                (('asdf', 'sdfg'), False)
+            ]
+        self._run_test_validate_time_test_data = [
+                (('mm_one_main.cc', 'mm_control', 'mm_one_longer_main.cc', 'mm_test', list(range(15)) + list(range(10))), (True, False)),
+                (('mm_one_longer_main.cc', 'mm_control', 'mm_one_main.cc', 'mm_test', list(range(15)) + list(range(10))), (True, True)),
+                (('mm_one_main.cc', 'mm_control', 'mm_one_longer_wrong_main.cc', 'mm_test', list(range(15)) + list(range(10))), (False, False))
+            ]
+        self._compile_run_test_validate_time_test_data = [
+                (('mm_one_main.cc', 'mm_one_longer_main.cc', list(range(15)) + list(range(10))), (True, False)),
+                (('mm_one_longer_main.cc', 'mm_one_main.cc', list(range(15)) + list(range(10))), (True, True)),
+                (('mm_one_main.cc', 'mm_one_longer_wrong_main.cc', list(range(15)) + list(range(10))), (False, False))
+            ]
+        self._generate_initial_data_test_data = [
+                (('mm_one.testproc', 'mm.cc', {}),      listtodata(list(range(15)) + list(range(10)) + [0]*6)),
+                (('mm_one_with.testproc', 'mm.cc', {}), listtodata(list(range(15)) + list(range(10)) + [0]*6)),
+            ]
+        self._format_insertion_dict_test_data = [
+                (('mm_one.testproc', 'mm_one.cc', {}),
+                    {
+                        'run': 'mm(A,B,C);',
+                        'read-out': 'datafile_initialize.read((char*)C, 6*sizeof(float));',
+                        'declarations': 'float A[3][5];\nfloat B[5][2];\nfloat C[3][2];',
+                        'write-out': 'datafile_out.write((char*)C, 6*sizeof(float));',
+                        'defines': '',
+                        'read-in': 'datafile_initialize.read((char*)A, 15*sizeof(float));\ndatafile_initialize.read((char*)B, 10*sizeof(float));'
+                    }),
+                (('mm_one_with.testproc', 'mm_one.cc', {}),
+                    {
+                        'run': 'mm(A,B,C);',
+                        'read-out': 'datafile_initialize.read((char*)C, 6*sizeof(float));',
+                        'declarations': 'float A[3][5];\nfloat B[5][2];\nfloat C[3][2];',
+                        'write-out': 'datafile_out.write((char*)C, 6*sizeof(float));',
+                        'defines': '',
+                        'read-in': 'datafile_initialize.read((char*)A, 15*sizeof(float));\ndatafile_initialize.read((char*)B, 10*sizeof(float));'
+                    }),
+            ]
+        self._write_generated_code_test_data = [
+                (('mm_one.testproc', 'mm_one.cc', 'control.cc', {}), 'mm_one_out.cc')
+            ]
+        self.run_from_src_test_data = [
+                (('mm_three_basic.cc', 'mm_three_slow.cc', self.staging_dir_wd), [('small', (True, False)), ('medium', (True, False)), ('big', (True, False))]),
+                (('mm_three_slow.cc', 'mm_three_basic.cc', self.staging_dir_wd), [('small', (True, True)), ('medium', (True, True)), ('big', (True, True))]),
+            ]
+    
+    def tearDown(self):
+        util.rmtemp()
+    
+    def test__get_script_parser(self):
+        cpp_validate._script_parser = None
+        self.assertIsNotNone(cpp_validate._get_script_parser())
+        self.assertIsNotNone(cpp_validate._get_script_parser())
+    
+    def _test_parse_src(self, parsefunc, test_data):
+        def parse_file(filename):
+            path = os.path.join(self.cpp_validate_dir, filename)
+            with open(path, 'r') as f:
+                src = f.read()
+            return parsefunc(src)
+        for args, expected in test_data:
+            srcfile, = args
+            val = parse_file(srcfile)
+            #TODO: make some assertions
+    
+    def test__parse_testproc_script(self):
+        self._test_parse_src(
+                cpp_validate._parse_testproc_script,
+                self._parse_testproc_script_test_data)
+    
+    @unittest.skip("not yet supported")
+    def test__parse_testproc_python(self):
+        self._test_parse_src(
+                cpp_validate._parse_testproc_python,
+                self._parse_testproc_python_test_data)
+    
+    def test__parse_testproc_iter(self):
+        def testfunc(filename):
+            path = os.path.join(self.cpp_validate_dir, filename)
+            util.shell('cp', [path, '.'], wd=self.staging_dir_wd)
+            return list(cpp_validate._parse_testproc_iter(filename, wd=self.staging_dir_wd))
+        for args, expected_list in self._parse_testproc_iter_test_data:
+            val_list = testfunc(*args)
+            for val, expected in zip(val_list, expected_list):
+                _, attr_val = val
+                attr_exp, = expected
+                self.assertEqual(attr_val, attr_exp)
+            #TODO: make some more assertions
+    
+    #def test__generate_data(self):
+    #    def testfunc(filename, direction):
+    #        path = os.path.join(self.cpp_validate_dir, filename)
+    #        util.shell('cp', [path, '.'], wd=self.staging_dir_wd)
+    #        for proc, attrs in cpp_validate._parse_testproc_iter(filename, wd=self.staging_dir_wd):
+    #            defines = eval(attrs['define'])
+    #            yield cpp_validate._generate_initial_data(proc, direction, filename, defines, wd=self.staging_dir_wd)
+    #        
+    #    for args, expected in self._generate_data_test_data:
+    #        for filename in testfunc(*args):
+    #            self.assertTrue(os.path.exists(filename))
+    #        #TODO: make some more assertions
+    
+    def test__compile_gpp(self):
+        def testfunc(src, obj):
+            src = os.path.join(self.cpp_validate_dir, src)
+            obj = os.path.join(self.staging_dir_wd, obj)
+            cpp_validate._compile_gpp(src, obj)
+        
+        for src, obj in self._compile_gpp_test_data:
+            testfunc(src, obj)
+            obj_path = os.path.join(self.staging_dir_wd, obj)
+            self.assertTrue(os.path.exists(obj_path))
+    
+    def test__test_time(self):
+        def testfunc(control_time, test_time):
+            return cpp_validate._test_time(control_time, test_time)
+        
+        for args, exp in self._test_time_test_data:
+            val = testfunc(*args)
+            self.assertEqual(val, exp)
+    
+    def test__test_validate(self):
+        def testfunc(control_data, test_data):
+            if util.python_version_major == 3:
+                control_data = bytes(map(ord,control_data))
+                test_data = bytes(map(ord,test_data))
+            control_file, control_path = util.mktemp('wb')
+            control_file.write(control_data)
+            control_file.close()
+            test_file, test_path = util.mktemp('wb')
+            test_file.write(test_data)
+            test_file.close()
+            return cpp_validate._test_validate(control_path, test_path)
+        
+        for args, exp in self._test_validate_test_data:
+            val = testfunc(*args)
+            self.assertEqual(val, exp)
+    
+    def test__run_test_validate_time(self):
+        def makeobj(src, obj):
+            src_path = os.path.join(self.cpp_validate_dir, src)
+            obj_path = os.path.join(self.staging_dir_wd, obj)
+            util.shell('g++', ['-o', obj_path, src_path, '-lrt'])
+            util.set_tempfile(obj_path)
+            return src_path, obj_path
+        
+        def testfunc(control_src, control_obj, test_src, test_obj, in_data):
+            control_src, control_obj = makeobj(control_src, control_obj)
+            test_src, test_obj = makeobj(test_src, test_obj)
+            inpath = os.path.join(self.staging_dir_wd, 'test.in.data')
+            with open(inpath, 'wb') as infile:
+                infile.write(listtodata(in_data))
+            util.set_tempfile(inpath)
+            return cpp_validate._run_test_validate_time(control_obj, test_obj, inpath)
+        
+        for args, expected in self._run_test_validate_time_test_data:
+            validate_val, time_val = testfunc(*args)
+            validate_exp, time_exp = expected
+            self.assertEqual(validate_val, validate_exp)
+            self.assertEqual(time_val, time_exp)
+    
+    def test__compile_run_test_validate_time(self):
+        def testfunc(control_src, test_src, in_data):
+            control_src = os.path.join(self.cpp_validate_dir, control_src)
+            test_src = os.path.join(self.cpp_validate_dir, test_src)
+            inpath = os.path.join(self.staging_dir_wd, 'test.in.data')
+            with open(inpath, 'wb') as infile:
+                infile.write(listtodata(in_data))
+            util.set_tempfile(inpath)
+            return cpp_validate._compile_run_test_validate_time(control_src, test_src, inpath)
+        
+        for args, expected in self._compile_run_test_validate_time_test_data:
+            validate_val, time_val = testfunc(*args)
+            validate_exp, time_exp = expected
+            self.assertEqual(validate_val, validate_exp)
+            self.assertEqual(time_val, time_exp)
+    
+    def test__generate_initial_data(self):
+        def testfunc(testprocfile, srcfile, defines):
+            testprocpath = os.path.join(self.cpp_validate_dir, testprocfile)
+            with open(testprocpath, 'r') as f:
+                srcpath = os.path.join(self.cpp_validate_dir, srcfile)
+                testproc = cpp_validate._parse_testproc_script(f.read())
+                return cpp_validate._generate_initial_data(testproc, srcpath, defines, wd=self.staging_dir_wd)
+        
+        for args, expected in self._generate_initial_data_test_data:
+            datafile = testfunc(*args)
+            with open(datafile, 'rb') as f:
+                self.assertEqual(len(f.read()), len(expected))
+    
+    def test__format_insertion_dict(self):
+        def testfunc(testprocfile, srcfile, defines):
+            testprocpath = os.path.join(self.cpp_validate_dir, testprocfile)
+            srcpath = os.path.join(self.cpp_validate_dir, srcfile)
+            with open(testprocpath, 'r') as f:
+                testproc = cpp_validate._parse_testproc_script(f.read())
+                #testproc.generatedata('in', defines)
+                #testproc.generatedata('out', defines)
+            return cpp_validate._format_insertion_dict(testproc, srcpath, defines)
+                
+        for args, exp in self._format_insertion_dict_test_data:
+            val = testfunc(*args)
+            for k,v in exp.items():
+                self.assertEqual(val[k], v)
+    
+    def test__write_generated_code(self):
+        def testfunc(testprocfile, srcname, destname, defines):
+            srcpath = os.path.join(self.cpp_validate_dir, srcname)
+            with open(os.path.join(self.cpp_validate_dir, testprocfile),'r') as f:
+                testproc = cpp_validate._parse_testproc_script(f.read())
+            return cpp_validate._write_generated_code(testproc, srcpath, defines, destname, self.staging_dir_wd)
+        for args, exp_path in self._write_generated_code_test_data:
+            val_path = testfunc(*args)
+            util.set_tempfile(val_path)
+            exp_path = os.path.join(self.cpp_validate_dir, exp_path)
+            with open(val_path, 'r') as valfile:
+                with open(exp_path, 'r') as expfile:
+                    self.assertEqual(valfile.read().splitlines(), expfile.read().splitlines())
+    
+    def test_run_from_src(self):
+        for args, expected in self.run_from_src_test_data:
+            control_src, test_src, wd = args
+            control_src = os.path.join(self.cpp_validate_dir, control_src)
+            test_src = os.path.join(self.cpp_validate_dir, test_src)
+            val = list(cpp_validate.run_from_src(control_src,test_src,wd))
+            self.assertEqual(val, expected)
+            
diff --git a/test-chill/unit-tests/test_gcov.py b/test-chill/unit-tests/test_gcov.py
new file mode 100644
index 0000000..2720ef7
--- /dev/null
+++ b/test-chill/unit-tests/test_gcov.py
@@ -0,0 +1,98 @@
+import itertools
+import pprint
+import os
+import textwrap
+import unittest
+
+import testchill.util as util
+import testchill.gcov as gcov
+
+
+class TestGCov(unittest.TestCase):
+    def setUp(self):
+        self.cprog_dir = os.path.join(os.getcwd(), 'unit-tests/cprog')
+        self.cprog_bin = os.path.join(self.cprog_dir, 'bin/sorter')
+    
+    def build_prog(self):
+        self.clean_prog()
+        util.shell('make', [], wd=self.cprog_dir)
+    
+    def clean_prog(self):
+        util.shell('make', ['clean'], wd=self.cprog_dir)
+    
+    def run_prog(self, alg, lst):
+        util.shell(self.cprog_bin, [alg] + list(map(str,lst)))
+    
+    def test_GcovLine_mrege_lines(self):
+        '''
+           56:   14:        while((index < pivot_index) && (list[index] >= pivot_value)) {
+            6:   15:            swap(list, index, pivot_index);
+            6:   16:            pivot_index--;
+            -:   17:        }
+        And
+            78:   14:        while((index < pivot_index) && (list[index] >= pivot_value)) {
+            18:   15:            swap(list, index, pivot_index);
+            18:   16:            pivot_index--;
+            -:   17:        }
+        '''
+        lines_proc_one = list(itertools.starmap(gcov.GcovLine,[ 
+                (14, {'proc_one':   56},'        while((index < pivot_index) && (list[index] >= pivot_value)) {'),
+                (15, {'proc_one':    6},'            swap(list, index, pivot_index);'),
+                (16, {'proc_one':    6},'            pivot_index--;'),
+                (17, {'proc_one': None},'        }')]))
+        lines_proc_two = list(itertools.starmap(gcov.GcovLine,[
+                (14, {'proc_two':   78},'        while((index < pivot_index) && (list[index] >= pivot_value)) {'),
+                (15, {'proc_two':   18},'            swap(list, index, pivot_index);'),
+                (16, {'proc_two':   18},'            pivot_index--;'),
+                (17, {'proc_two': None},'        }')]))
+        gcov.GcovLine.merge_lines(lines_proc_one, lines_proc_two)
+        self.assertEqual(lines_proc_one[0].lineno, 14)
+        self.assertEqual(lines_proc_one[1].lineno, 15)
+        self.assertEqual(lines_proc_one[2].lineno, 16)
+        self.assertEqual(lines_proc_one[3].lineno, 17)
+    
+    def test_GcovLine_merge_and_count(self):
+        lines_proc_one = list(itertools.starmap(gcov.GcovLine,[ 
+                (14, {'proc_one':   56},'        while((index < pivot_index) && (list[index] >= pivot_value)) {'),
+                (15, {'proc_one':    6},'            swap(list, index, pivot_index);'),
+                (16, {'proc_one':    6},'            pivot_index--;'),
+                (17, {'proc_one': None},'        }')]))
+        lines_proc_two = list(itertools.starmap(gcov.GcovLine,[
+                (14, {'proc_two':   78},'        while((index < pivot_index) && (list[index] >= pivot_value)) {'),
+                (15, {'proc_two':   18},'            swap(list, index, pivot_index);'),
+                (16, {'proc_two':   18},'            pivot_index--;'),
+                (17, {'proc_two': None},'        }')]))
+        gcov.GcovLine.merge_lines(lines_proc_one, lines_proc_two)
+        self.assertEqual(lines_proc_one[0].count(), 134)
+        self.assertEqual(lines_proc_one[1].count(), 24)
+        self.assertEqual(lines_proc_one[2].count(), 24)
+        self.assertEqual(lines_proc_one[3].count(), None)
+    
+    def test_GcovFile_parse_lines(self):
+        lines = textwrap.dedent(
+            '''-:0:SomeProperty:SomeValue
+               56:   14:        while((index < pivot_index) && (list[index] >= pivot_value)) {
+                6:   15:            swap(list, index, pivot_index);
+                6:   16:            pivot_index--;
+                -:   17:        }''').splitlines()
+        lines, properties = gcov.GcovFile.parse_lines(lines, 'proc')
+        self.assertEqual(lines[0].lineno, 14)
+        self.assertEqual(lines[0].count_by_process, {'proc': 56})
+        self.assertEqual(lines[0].code, '        while((index < pivot_index) && (list[index] >= pivot_value)) {')
+        self.assertEqual(lines[3].count_by_process, dict())
+    
+    def test_Gcov_parse(self):
+        self.build_prog()
+        self.run_prog('quicksort', [9, 4, 10, 6, 11, 0, 3, 7, 2, 1, 8, 5])
+        cov = gcov.Gcov.parse(self.cprog_dir, 'unsorted')
+        self.build_prog()
+        self.run_prog('quicksort', [5, 4, 3, 2, 1])
+        #pprint.pprint(vars(cov.files['QuickSorter.cc']))
+        cov.merge(gcov.Gcov.parse(self.cprog_dir, 'reverse'))
+        #pprint.pprint(vars(cov.files['QuickSorter.cc']))
+        #TODO: assert something
+        #cov.pretty_print()
+    
+    def tearDown(self):
+        self.clean_prog()
+        
diff --git a/test-chill/unit-tests/test_omega.py b/test-chill/unit-tests/test_omega.py
new file mode 100644
index 0000000..91d6a13
--- /dev/null
+++ b/test-chill/unit-tests/test_omega.py
@@ -0,0 +1,23 @@
+import os
+import unittest
+
+import testchill.omega
+import testchill.util
+
+
+class TestOmegaTestCases(unittest.TestCase):
+    def setUp(self):
+        self.omega_dev_dir = os.getenv('OMEGA_DEV_SRC')
+        self.omega_rel_dir = os.getenv('OMEGA_RELEASE_SRC')
+    
+    def tearDown(self):
+        pass
+    
+    def test_omega_dev(self):
+        tc = testchill.omega.BuildOmegaTestCase(self.omega_dev_dir)
+        tc.run()
+        
+    def test_omega_release(self):
+        tc = testchill.omega.BuildOmegaTestCase(self.omega_rel_dir, 'release')
+        tc.run()
+    
diff --git a/test-chill/unit-tests/test_test.py b/test-chill/unit-tests/test_test.py
new file mode 100644
index 0000000..9745358
--- /dev/null
+++ b/test-chill/unit-tests/test_test.py
@@ -0,0 +1,380 @@
+import io
+import pickle
+import pprint
+import unittest
+import textwrap
+
+import testchill.test as test
+import testchill.util as util
+
+
+class Named(object):
+    def __init__(self, name):
+        self.name = name
+    
+    def setresult(self, res):
+        pass
+
+def make_tc(rfunc=None, sufunc=None, tdfunc=None, name=None):
+    class SomeTestCase(test.TestCase):
+        def setUp(self):
+            if sufunc:
+                sufunc(self)
+            
+        def run(self):
+            if rfunc != None:
+                return rfunc(self)
+            
+        def tearDown(self):
+            if tdfunc:
+                tdfunc(self)
+    
+    return SomeTestCase(name)
+
+def make_seqtc(subtests, sufunc=None, tdfunc=None, name=None):
+    class SomeSeqTestCase(test.SequencialTestCase):
+        def __init__(self, name):
+            test.SequencialTestCase.__init__(self, name)
+            for fn_name, func in subtests:
+                self.add_subtest(fn_name, func)
+        
+        def setUp(self):
+            if sufunc:
+                sufunc(self)
+                
+        def tearDown(self):
+            if tdfunc:
+                tdfunc(self)
+        
+    return SomeSeqTestCase(name)
+
+
+class TestTest(unittest.TestCase):
+    
+    def flip_n_switch(self, n, value=True):
+        '''
+        Return a function that sets switches[n] to value (True by default)
+        '''
+        def flipswitch(tc):
+            self.switches[n] = value
+        return flipswitch
+    
+    def flip_n_switch_if_m(self, n, m, value=True):
+        '''
+        Returns a function that sets switches[n] to value (True by default) if switches[m] is True
+        '''
+        def flipswitch(tc):
+            if self.switches[m]:
+                self.switches[n] = value
+        return flipswitch
+    
+    def allways_raise(self, exc=Exception('Expected exception')):
+        '''
+        Returns a function that raises an exception
+        '''
+        def throwexc(tc):
+            raise exc
+        return throwexc
+    
+    def allways_fail(self):
+        '''
+        Returns a function that returns an explicit failure
+        '''
+        def fail(tc):
+            return test.TestResult.make_fail(test.TestResult, tc)
+        return fail
+    
+    def allways_skip(self):
+        '''
+        Returns a function that skips
+        '''
+        def skip(tc):
+            return test.TestResult.make_skipped(test.TestResult, tc)
+        return skip
+    
+    def allways_pass(self):
+        '''
+        Returns a function that passes
+        '''
+        def notfail(tc):
+            return test.TestResult.make_pass(test.TestResult, tc)
+        return notfail
+    
+    def donothing(self):
+        '''
+        Returns a function that does nothing
+        '''
+        def foo(tc):
+            pass
+        return foo
+    
+    def setUp(self):
+        self.switches = dict((n, False) for n in range(3))
+    
+    def test_TestResult_make_pass(self):
+        self.assertTrue(test.TestResult.make_pass(test.TestResult, Named('i-pass')).passed())
+        self.assertFalse(test.TestResult.make_pass(test.TestResult, Named('i-pass')).errored())
+        self.assertFalse(test.TestResult.make_pass(test.TestResult, Named('i-pass')).failed())
+        self.assertFalse(test.TestResult.make_pass(test.TestResult, Named('i-pass')).skipped())
+    
+    def test_TestResult_make_error(self):
+        self.assertFalse(test.TestResult.make_error(test.TestResult, Named('i-error')).passed())
+        self.assertTrue(test.TestResult.make_error(test.TestResult, Named('i-error')).errored())
+        self.assertFalse(test.TestResult.make_error(test.TestResult, Named('i-error')).failed())
+        self.assertFalse(test.TestResult.make_error(test.TestResult, Named('i-error')).skipped())
+    
+    def test_TestResult_make_fail(self):
+        self.assertFalse(test.TestResult.make_fail(test.TestResult, Named('i-fail')).passed())
+        self.assertFalse(test.TestResult.make_fail(test.TestResult, Named('i-fail')).errored())
+        self.assertTrue(test.TestResult.make_fail(test.TestResult, Named('i-fail')).failed())
+        self.assertFalse(test.TestResult.make_fail(test.TestResult, Named('i-fail')).skipped())
+    
+    def test_TestResult_make_skipped(self):
+        self.assertFalse(test.TestResult.make_skipped(test.TestResult, Named('i-skip')).passed())
+        self.assertFalse(test.TestResult.make_skipped(test.TestResult, Named('i-skip')).errored())
+        self.assertFalse(test.TestResult.make_skipped(test.TestResult, Named('i-skip')).failed())
+        self.assertTrue(test.TestResult.make_skipped(test.TestResult, Named('i-skip')).skipped())
+    
+    def test__result(self):
+        result_passed = test.TestResult.make_pass(test.TestResult, Named('i-pass'))
+        result_failed = test.TestResult.make_fail(test.TestResult, Named('i-fail'))
+        self.assertTrue(result_passed is test._result(result_passed, Named('i-pass')))
+        self.assertTrue(test._result(result_failed, Named('i-fail')).failed())
+        self.assertTrue(test._result(Exception(), Named('i-error')).errored())
+    
+    def test_run_empty(self):
+        test.run([])
+    
+    def test_run_run(self):
+        test.run([make_tc(
+            rfunc=self.flip_n_switch(0))])
+        self.assertTrue(self.switches[0])
+    
+    def test_run_setupfirst(self):
+        test.run([make_tc(
+                rfunc = self.flip_n_switch_if_m(0,1),
+                sufunc = self.flip_n_switch(1))])
+        self.assertTrue(self.switches[0])
+    
+    def test_run_teardownlast(self):
+        test.run([make_tc(
+                rfunc = self.flip_n_switch(1),
+                tdfunc = self.flip_n_switch_if_m(0,1))])
+        self.assertTrue(self.switches[0])
+    
+    def test_run_teardown_allways(self):
+        test.run([make_tc(
+                rfunc = self.allways_raise(),
+                tdfunc = self.flip_n_switch(0))])
+        self.assertTrue(self.switches[0])
+    
+    def test_run_pass_result(self):
+        result_set = test.run([make_tc(
+                rfunc = self.donothing(),
+                name='pass')])
+        result = result_set[0]
+        self.assertTrue(result.passed())
+        self.assertFalse(result.errored())
+        self.assertFalse(result.failed())
+        self.assertFalse(result.skipped())
+    
+    def test_run_error_result(self):
+        result_set = test.run([make_tc(
+                rfunc = self.allways_raise(),
+                name='error')])
+        result = result_set[0]
+        self.assertFalse(result.passed())
+        self.assertTrue(result.errored())
+        self.assertFalse(result.failed())
+        self.assertFalse(result.skipped())
+    
+    def test_run_fail_result(self):
+        result_set = test.run([make_tc(
+                rfunc = self.allways_fail(),
+                name='fail')])
+        result = result_set[0]
+        self.assertFalse(result.passed())
+        self.assertFalse(result.errored())
+        self.assertTrue(result.failed())
+        self.assertFalse(result.skipped())
+
+    def test_run_skipped_result(self):
+        result_set = test.run([make_tc(
+                rfunc = self.allways_skip(),
+                name='skipped')])
+        result = result_set[0]
+        self.assertFalse(result.passed())
+        self.assertFalse(result.errored())
+        self.assertFalse(result.failed())
+        self.assertTrue(result.skipped())
+    
+    def test_run_seq_empty(self):
+        test.run([make_seqtc([])])
+    
+    def test_run_seq_allrun(self):
+        result_set = test.run([make_seqtc([
+                ('one', self.flip_n_switch(0)),
+                ('two', self.flip_n_switch(1)),
+                ('three', self.flip_n_switch(2))],
+                name='seq')])
+        self.assertTrue(result_set[0].passed())
+        self.assertTrue(self.switches[0])
+        self.assertTrue(self.switches[1])
+        self.assertTrue(self.switches[2])
+    
+    def test_run_seq_until_fail(self):
+        result_set = test.run([make_seqtc([
+                ('one', self.flip_n_switch(0)),
+                ('two', self.allways_fail()),
+                ('trhee', self.flip_n_switch(1))],
+                name='seq')])
+        self.assertTrue(result_set[0].failed())
+        self.assertTrue(self.switches[0])
+        self.assertFalse(self.switches[1])
+    
+    def test_run_seq_until_error(self):
+        result_set = test.run([make_seqtc([
+                ('one', self.flip_n_switch(0)),
+                ('two', self.allways_raise()),
+                ('trhee', self.flip_n_switch(1))],
+                name='seq')])
+        self.assertTrue(result_set[0].errored())
+        self.assertTrue(self.switches[0])
+        self.assertFalse(self.switches[1])
+    
+    def test_persistance_one_pass(self):
+        result_set = test.run([make_tc(self.allways_pass(), name='tc-name')])
+        read_result_set = util.withtmp(
+            lambda f: pickle.dump(result_set, f),
+            lambda f: pickle.load(f))
+        self.assertEqual(list(map(vars,result_set)), list(map(vars,read_result_set)))
+    
+    def test_persistance_seq(self):
+        result_set = test.run([make_seqtc([
+            ('one', self.flip_n_switch(0)),
+            ('two', self.flip_n_switch(1))],
+            name = 'seq')])
+        read_result_set = util.withtmp(
+            lambda f: pickle.dump(result_set, f),
+            lambda f: pickle.load(f))
+        
+        for i in range(len(result_set)):
+            self.assertEqual(result_set[i].status, read_result_set[i].status)
+            self.assertEqual(result_set[i].testcase_name, read_result_set[i].testcase_name)
+            for j in range(len(result_set[i].sub_results)):
+                self.assertEqual(result_set[i].sub_results[j].status, read_result_set[i].sub_results[j].status)
+                self.assertEqual(result_set[i].sub_results[j].testcase_name, read_result_set[i].sub_results[j].testcase_name)
+    
+    def test_persistance_seq_error(self):
+        result_set = test.run([make_seqtc([
+            ('one', self.flip_n_switch(0)),
+            ('two', self.allways_raise())],
+            name = 'seq')])
+        read_result_set = util.withtmp(
+            lambda f: pickle.dump(result_set, f),
+            lambda f: pickle.load(f))
+        
+        for i in range(len(result_set)):
+            self.assertEqual(result_set[i].status, read_result_set[i].status)
+            self.assertEqual(result_set[i].testcase_name, read_result_set[i].testcase_name)
+            for j in range(len(result_set[i].sub_results)):
+                self.assertEqual(result_set[i].sub_results[j].status, read_result_set[i].sub_results[j].status)
+                self.assertEqual(result_set[i].sub_results[j].testcase_name, read_result_set[i].sub_results[j].testcase_name)
+        
+    def test_FailedTestResult_init(self):
+        result = test.TestResult.make_fail(test.FailedTestResult, Named('i-fail'), reason='testing')
+        self.assertFalse(result.passed())
+        self.assertTrue(result.failed())
+        self.assertFalse(result.errored())
+        self.assertFalse(result.skipped())
+        self.assertEqual(result.testcase_name, 'i-fail')
+        self.assertEqual(result.reason, 'testing')
+    
+    def test_pretty_print(self):
+        def pretty_print_to_string(results_iter):
+            sio = util.StringIO()
+            test.pretty_print_results(results_iter, outfile=sio)
+            return sio.getvalue()
+            
+        results_iter = iter([
+                test.TestResult.make_pass(test.TestResult, Named('i-pass')),
+                test.TestResult.make_error(test.UnhandledExceptionTestResult, Named('i-error'), Exception, Exception(), None),
+                test.TestResult.make_fail(test.FailedTestResult, Named('i-fail'), reason='Oops'),
+                test.TestResult.make_skipped(test.TestResult, Named('i-skip'))
+            ])
+        
+        self.assertEqual(pretty_print_to_string(results_iter), textwrap.dedent('''\
+            Passed: 1
+            Errors: 1
+            Failed: 1
+            Skipped: 1
+            ============================================================
+            error: i-error
+            ------------------------------------------------------------
+            <class 'Exception'>: 
+            ------------------------------------------------------------
+            ============================================================
+            fail: i-fail
+            ------------------------------------------------------------
+            Oops
+            ------------------------------------------------------------
+            '''))
+        result_set = test.run([make_seqtc([
+                ('one', self.flip_n_switch(0)),
+                ('two', self.flip_n_switch(1)),
+                ('three', self.flip_n_switch(2))],
+                name='seq')])
+        result_set_fail = test.run([make_seqtc([
+                ('one', self.flip_n_switch(0)),
+                ('two', lambda s: test.TestResult.make_fail(test.FailedTestResult, s, 'Oops')),
+                ('trhee', self.flip_n_switch(1))],
+                name='seq')])
+        result_set_error = test.run([make_seqtc([
+                ('one', self.flip_n_switch(0)),
+                ('two', lambda s: test.TestResult.make_error(test.UnhandledExceptionTestResult, s, Exception, Exception(), None))],
+                name = 'seq')])
+        
+        
+        
+        compound_set = pretty_print_to_string(result_set)
+        compound_set_fail = pretty_print_to_string(result_set_fail)
+        compound_set_error = pretty_print_to_string(result_set_error)
+        
+        self.assertEqual(compound_set, textwrap.dedent('''\
+            Passed: 1
+            Errors: 0
+            Failed: 0
+            Skipped: 0
+            '''))
+        self.assertEqual(compound_set_fail, textwrap.dedent('''\
+            Passed: 0
+            Errors: 0
+            Failed: 1
+            Skipped: 0
+            ============================================================
+            fail: seq
+            ------------------------------------------------------------
+            pass: one
+            fail: two
+            Oops
+            ------------------------------------------------------------
+            '''))
+        self.assertEqual(compound_set_error, textwrap.dedent('''\
+            Passed: 0
+            Errors: 1
+            Failed: 0
+            Skipped: 0
+            ============================================================
+            error: seq
+            ------------------------------------------------------------
+            pass: one
+            error: two
+            <class 'Exception'>: 
+            ------------------------------------------------------------
+            '''))
+    
+    def tearDown(self):
+        util.rmtemp()
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test-chill/unit-tests/test_util.py b/test-chill/unit-tests/test_util.py
new file mode 100644
index 0000000..fbb0c79
--- /dev/null
+++ b/test-chill/unit-tests/test_util.py
@@ -0,0 +1,107 @@
+import os
+import subprocess
+import tempfile
+import unittest
+
+import testchill.util as util
+
+### Most of these are sanity checks. ###
+
+class TestUtil(unittest.TestCase):
+    def setUp(self):
+        self.tempfiles = []
+    
+    def maketempfiles(self, n=1):
+        files = tuple([tempfile.mkstemp(text=True) for i in range(n)])
+        self.tempfiles += list(map(lambda f: f[1], files))
+        return files
+        
+    def test_shell(self):
+        sbla = subprocess.check_output(['ls', '-la', 'test-cases/chill'])
+        
+        if util.python_version_major == 3:
+            sbla = sbla.decode()
+        
+        shla = util.shell('ls', ['-la', 'test-cases/chill'])
+        self.assertEqual(sbla, shla)
+    
+    def test_shell_env(self):
+        env = {'STRING_VAR':'string','NUMBER_VAR':3,'DEFINED_VAR':1}
+        
+        self.assertEqual(util.shell('echo', ['$STRING_VAR'], env=env), env['STRING_VAR'] + '\n')
+        self.assertEqual(util.shell('echo', ['$NUMBER_VAR'], env=env), str(env['NUMBER_VAR']) + '\n')
+        self.assertEqual(util.shell('echo', ['$DEFINED_VAR'], env=env), str(env['DEFINED_VAR']) + '\n')
+    
+    def test_shell_tofile(self):
+        tfile = self.maketempfiles(1)
+        fname = tfile[0][1]
+        
+        with open(fname, 'w') as f:
+            util.shell('ls', ['-la', 'test-cases/chill'], stdout=f)
+        with open(fname, 'r') as f:
+            self.assertEqual(util.shell('ls', ['-la', 'test-cases/chill']), f.read())
+    
+    def test_copy(self):
+        class C(object):
+            pass
+        c = C()
+        c.x = 'x'
+        a = util.copy(c)
+        b = util.copy(c)
+        a.x = 'y'
+        self.assertEqual(c.x,'x')
+        self.assertEqual(b.x,'x')
+        self.assertEqual(a.x,'y')
+    
+    def test_callonce(self):
+        def foo():
+            return 3
+        foo_once = util.callonce(foo)
+        self.assertEqual(foo_once(), 3)
+        self.assertRaises(Exception, foo_once)
+    
+    def test_isdiff(self):
+        testdata = [
+                (('aaa','aaa'),(False,'  aaa')),
+                (('aab','aaa'),(True, '- aab\n+ aaa')),
+                (('a\nb','a\nb\nc'),(True, '  a\n  b\n+ c')),
+                (('a\na\nc','a\nb\nc'),(True, '  a\n- a\n+ b\n  c'))
+            ]
+        for args, expected in testdata:
+            isdiff_exp, diff_exp = expected
+            isdiff_val, diff_val = util.isdiff(*args)
+            self.assertEqual(isdiff_val, isdiff_exp)
+            self.assertEqual(diff_val, diff_exp)
+    
+    def test_filterext(self):
+        testdata = [
+                ((['.c','.py'],['a.c','b.txt','c.py']),['a.c','c.py'])
+            ]
+        for args, expected in testdata:
+            self.assertEqual(list(util.filterext(*args)), expected)
+    
+    #TODO:
+    #def test_extract_tag(self):
+    #    testdata = [
+    #            (('a', 'abc<a>def</a>ghi<b>jkl</b>mno<c>pqr</c>stu<b>zwx</b>yz'), ['def']),
+    #            (('b', 'abc<a>def</a>ghi<b>jkl</b>mno<c>pqr</c>stu<b>zwx</b>yz'), ['jkl','zwx']),
+    #            (('c', 'abc<a>def</a>ghi<b>jkl</b>mno<c>pqr</c>stu<b>zwx</b>yz'), ['pqr']),
+    #            (('d', 'abc<a>def</a>ghi<b>jkl</b>mno<c>pqr</c>stu<b>zwx</b>yz'), []),
+    #        ]
+    #    for args, expected in testdata:
+    #        self.assertEqual(list(util.extract_tag(*args)), expected)
+    
+    def test_textstream(self):
+        testdata = [
+                (('asdf',),'asdf')
+            ]
+        for args, expected in testdata:
+            stream = util.textstream(*args)
+            self.assertTrue(hasattr(stream,'read'))
+            self.assertEqual(stream.read(), expected)
+    
+    def tearDown(self):
+        for f in self.tempfiles:
+            os.remove(f)
+    
+
diff --git a/verify-chill/mm.c b/verify-chill/mm.c
new file mode 100644
index 0000000..354d929
--- /dev/null
+++ b/verify-chill/mm.c
@@ -0,0 +1,15 @@
+
+
+void mm(float **A, float **B, float **C, int ambn, int an, int bm) {
+  int i, j, n;
+
+  for(i = 0; i < an; i++) {
+    for(j = 0; j < bm; j++) {
+      C[i][j] = 0.0f;
+      for(n = 0; n < ambn; n++) {
+        C[i][j] += A[i][n] * B[n][j];
+      }
+    }
+  }
+}
+
diff --git a/test-chill/runtests b/verify-chill/runtests
index e5021f8..e5021f8 100755
--- a/test-chill/runtests
+++ b/verify-chill/runtests
diff --git a/verify-chill/test_distribute.py b/verify-chill/test_distribute.py
new file mode 100644
index 0000000..760d29f
--- /dev/null
+++ b/verify-chill/test_distribute.py
@@ -0,0 +1,12 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known('ambn > 0')
+known('an > 0')
+known('bm > 0')
+distribute([0,1], 1)
+print_code()
diff --git a/verify-chill/test_fuse.py b/verify-chill/test_fuse.py
new file mode 100644
index 0000000..32c594c
--- /dev/null
+++ b/verify-chill/test_fuse.py
@@ -0,0 +1,14 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known('ambn > 0')
+known('an > 0')
+known('bm > 0')
+distribute([0,1], 1)
+print_code()
+fuse([0,1], 1)
+print_code()
diff --git a/verify-chill/test_known.py b/verify-chill/test_known.py
new file mode 100644
index 0000000..662d7d0
--- /dev/null
+++ b/verify-chill/test_known.py
@@ -0,0 +1,11 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known('ambn > 0')
+known('an > 0')
+known('bm > 0')
+print_code()
diff --git a/verify-chill/test_known_2.py b/verify-chill/test_known_2.py
new file mode 100644
index 0000000..5b16325
--- /dev/null
+++ b/verify-chill/test_known_2.py
@@ -0,0 +1,9 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known(['ambn > 0', 'an > 0', 'bm > 0'])
+print_code()
diff --git a/verify-chill/test_original.py b/verify-chill/test_original.py
new file mode 100644
index 0000000..2d17799
--- /dev/null
+++ b/verify-chill/test_original.py
@@ -0,0 +1,12 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known(['ambn > 4', 'an > 0', 'bm > 0'])
+peel(1,3,4)
+print_code()
+original()
+print_code()
diff --git a/verify-chill/test_peel.py b/verify-chill/test_peel.py
new file mode 100644
index 0000000..bb6c583
--- /dev/null
+++ b/verify-chill/test_peel.py
@@ -0,0 +1,10 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known(['ambn > 4', 'an > 0', 'bm > 0'])
+peel(1,3,4)
+print_code()
diff --git a/test-chill/test_permute.py b/verify-chill/test_permute.py
index ff4a42a..ff4a42a 100644
--- a/test-chill/test_permute.py
+++ b/verify-chill/test_permute.py
diff --git a/verify-chill/test_print_code.py b/verify-chill/test_print_code.py
new file mode 100644
index 0000000..004c46c
--- /dev/null
+++ b/verify-chill/test_print_code.py
@@ -0,0 +1,8 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+print_code()
diff --git a/verify-chill/test_print_dep.py b/verify-chill/test_print_dep.py
new file mode 100644
index 0000000..a3dee29
--- /dev/null
+++ b/verify-chill/test_print_dep.py
@@ -0,0 +1,8 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+print_dep()
diff --git a/verify-chill/test_print_space.py b/verify-chill/test_print_space.py
new file mode 100644
index 0000000..2f8f678
--- /dev/null
+++ b/verify-chill/test_print_space.py
@@ -0,0 +1,8 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+print_space()
diff --git a/verify-chill/test_reverse.py b/verify-chill/test_reverse.py
new file mode 100644
index 0000000..a97c611
--- /dev/null
+++ b/verify-chill/test_reverse.py
@@ -0,0 +1,12 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known(['ambn > 0', 'an > 0', 'bm > 0'])
+distribute([0,1],1)
+reverse([1],1)
+reverse([1],2)
+print_code()
diff --git a/verify-chill/test_scale.py b/verify-chill/test_scale.py
new file mode 100644
index 0000000..ee8455d
--- /dev/null
+++ b/verify-chill/test_scale.py
@@ -0,0 +1,12 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known(['ambn > 0', 'an > 0', 'bm > 0'])
+distribute([0,1],1)
+scale([1],1,4)
+scale([1],2,4)
+print_code()
diff --git a/verify-chill/test_shift.py b/verify-chill/test_shift.py
new file mode 100644
index 0000000..b3fc6ab
--- /dev/null
+++ b/verify-chill/test_shift.py
@@ -0,0 +1,12 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known('ambn > 0')
+known('an > 0')
+known('bm > 0')
+shift([1],1,4)
+print_code()
diff --git a/verify-chill/test_shift_to.py b/verify-chill/test_shift_to.py
new file mode 100644
index 0000000..f3537c5
--- /dev/null
+++ b/verify-chill/test_shift_to.py
@@ -0,0 +1,12 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known('ambn > 0')
+known('an > 0')
+known('bm > 0')
+shift_to(1,1,4)
+print_code()
diff --git a/verify-chill/test_skew.py b/verify-chill/test_skew.py
new file mode 100644
index 0000000..c7271f4
--- /dev/null
+++ b/verify-chill/test_skew.py
@@ -0,0 +1,12 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known('ambn > 0')
+known('an > 0')
+known('bm > 0')
+skew([1],1,[4])
+print_code()
diff --git a/verify-chill/test_tile.py b/verify-chill/test_tile.py
new file mode 100644
index 0000000..fbe0368
--- /dev/null
+++ b/verify-chill/test_tile.py
@@ -0,0 +1,14 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+#known('ambn > 0')
+#known('an > 0')
+#known('bm > 0')
+#tile(1, 1, 4, 1)
+#tile(1, 3, 4, 2)
+tile(0,2,4)
+print_code()
diff --git a/verify-chill/test_unroll.py b/verify-chill/test_unroll.py
new file mode 100644
index 0000000..39dd0db
--- /dev/null
+++ b/verify-chill/test_unroll.py
@@ -0,0 +1,13 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known('ambn > 0')
+known('an > 0')
+known('bm > 0')
+distribute([0,1], 1)
+unroll(1, 3, 4)
+print_code()
diff --git a/verify-chill/test_unroll_extra.py b/verify-chill/test_unroll_extra.py
new file mode 100644
index 0000000..929313c
--- /dev/null
+++ b/verify-chill/test_unroll_extra.py
@@ -0,0 +1,12 @@
+from chill import *
+
+source('mm.c')
+procedure('mm')
+#format: rose
+loop(0)
+
+known('ambn > 0')
+known('an > 0')
+known('bm > 0')
+unroll_extra(1, 2, 4)
+print_code()
diff --git a/test-chill/testrun.log b/verify-chill/testrun.log
index 7611517..7611517 100644
--- a/test-chill/testrun.log
+++ b/verify-chill/testrun.log
diff --git a/test-chill/with/test_distribute.py.out b/verify-chill/with/test_distribute.py.out
index f071e87..f071e87 100644
--- a/test-chill/with/test_distribute.py.out
+++ b/verify-chill/with/test_distribute.py.out
diff --git a/test-chill/with/test_fuse.py.out b/verify-chill/with/test_fuse.py.out
index 062bd15..062bd15 100644
--- a/test-chill/with/test_fuse.py.out
+++ b/verify-chill/with/test_fuse.py.out
diff --git a/test-chill/with/test_known.py.out b/verify-chill/with/test_known.py.out
index 062bd15..062bd15 100644
--- a/test-chill/with/test_known.py.out
+++ b/verify-chill/with/test_known.py.out
diff --git a/test-chill/with/test_known_2.py.out b/verify-chill/with/test_known_2.py.out
index 062bd15..062bd15 100644
--- a/test-chill/with/test_known_2.py.out
+++ b/verify-chill/with/test_known_2.py.out
diff --git a/test-chill/with/test_original.py.out b/verify-chill/with/test_original.py.out
index 3d84795..3d84795 100644
--- a/test-chill/with/test_original.py.out
+++ b/verify-chill/with/test_original.py.out
diff --git a/test-chill/with/test_peel.py.out b/verify-chill/with/test_peel.py.out
index 3d84795..3d84795 100644
--- a/test-chill/with/test_peel.py.out
+++ b/verify-chill/with/test_peel.py.out
diff --git a/test-chill/with/test_permute.py.out b/verify-chill/with/test_permute.py.out
index 709b7ae..709b7ae 100644
--- a/test-chill/with/test_permute.py.out
+++ b/verify-chill/with/test_permute.py.out
diff --git a/test-chill/with/test_print_code.py.out b/verify-chill/with/test_print_code.py.out
index f6e21a7..f6e21a7 100644
--- a/test-chill/with/test_print_code.py.out
+++ b/verify-chill/with/test_print_code.py.out
diff --git a/test-chill/with/test_print_dep.py.out b/verify-chill/with/test_print_dep.py.out
index f6e21a7..f6e21a7 100644
--- a/test-chill/with/test_print_dep.py.out
+++ b/verify-chill/with/test_print_dep.py.out
diff --git a/test-chill/with/test_print_space.py.out b/verify-chill/with/test_print_space.py.out
index f6e21a7..f6e21a7 100644
--- a/test-chill/with/test_print_space.py.out
+++ b/verify-chill/with/test_print_space.py.out
diff --git a/test-chill/with/test_reverse.py.out b/verify-chill/with/test_reverse.py.out
index 1e9c9b6..1e9c9b6 100644
--- a/test-chill/with/test_reverse.py.out
+++ b/verify-chill/with/test_reverse.py.out
diff --git a/test-chill/with/test_scale.py.out b/verify-chill/with/test_scale.py.out
index dff7515..dff7515 100644
--- a/test-chill/with/test_scale.py.out
+++ b/verify-chill/with/test_scale.py.out
diff --git a/test-chill/with/test_shift.py.out b/verify-chill/with/test_shift.py.out
index 6eaa244..6eaa244 100644
--- a/test-chill/with/test_shift.py.out
+++ b/verify-chill/with/test_shift.py.out
diff --git a/test-chill/with/test_shift_to.py.out b/verify-chill/with/test_shift_to.py.out
index 01c7ff8..01c7ff8 100644
--- a/test-chill/with/test_shift_to.py.out
+++ b/verify-chill/with/test_shift_to.py.out
diff --git a/test-chill/with/test_skew.py.out b/verify-chill/with/test_skew.py.out
index b3da044..b3da044 100644
--- a/test-chill/with/test_skew.py.out
+++ b/verify-chill/with/test_skew.py.out
diff --git a/test-chill/with/test_tile.py.out b/verify-chill/with/test_tile.py.out
index 47bae06..47bae06 100644
--- a/test-chill/with/test_tile.py.out
+++ b/verify-chill/with/test_tile.py.out
diff --git a/test-chill/with/test_unroll.py.out b/verify-chill/with/test_unroll.py.out
index 7770c60..7770c60 100644
--- a/test-chill/with/test_unroll.py.out
+++ b/verify-chill/with/test_unroll.py.out
diff --git a/test-chill/with/test_unroll_extra.py.out b/verify-chill/with/test_unroll_extra.py.out
index 232d1c8..232d1c8 100644
--- a/test-chill/with/test_unroll_extra.py.out
+++ b/verify-chill/with/test_unroll_extra.py.out
diff --git a/test-chill/without/test_distribute.py.out b/verify-chill/without/test_distribute.py.out
index e90e735..e90e735 100644
--- a/test-chill/without/test_distribute.py.out
+++ b/verify-chill/without/test_distribute.py.out
diff --git a/test-chill/without/test_fuse.py.out b/verify-chill/without/test_fuse.py.out
index e8f0856..e8f0856 100644
--- a/test-chill/without/test_fuse.py.out
+++ b/verify-chill/without/test_fuse.py.out
diff --git a/test-chill/without/test_known.py.out b/verify-chill/without/test_known.py.out
index e8f0856..e8f0856 100644
--- a/test-chill/without/test_known.py.out
+++ b/verify-chill/without/test_known.py.out
diff --git a/test-chill/without/test_known_2.py.out b/verify-chill/without/test_known_2.py.out
index e8f0856..e8f0856 100644
--- a/test-chill/without/test_known_2.py.out
+++ b/verify-chill/without/test_known_2.py.out
diff --git a/test-chill/without/test_original.py.out b/verify-chill/without/test_original.py.out
index 15cfc7b..15cfc7b 100644
--- a/test-chill/without/test_original.py.out
+++ b/verify-chill/without/test_original.py.out
diff --git a/test-chill/without/test_peel.py.out b/verify-chill/without/test_peel.py.out
index 15cfc7b..15cfc7b 100644
--- a/test-chill/without/test_peel.py.out
+++ b/verify-chill/without/test_peel.py.out
diff --git a/test-chill/without/test_permute.py.out b/verify-chill/without/test_permute.py.out
index 5aeecfd..5aeecfd 100644
--- a/test-chill/without/test_permute.py.out
+++ b/verify-chill/without/test_permute.py.out
diff --git a/test-chill/without/test_print_code.py.out b/verify-chill/without/test_print_code.py.out
index f96eb53..f96eb53 100644
--- a/test-chill/without/test_print_code.py.out
+++ b/verify-chill/without/test_print_code.py.out
diff --git a/test-chill/without/test_print_dep.py.out b/verify-chill/without/test_print_dep.py.out
index f96eb53..f96eb53 100644
--- a/test-chill/without/test_print_dep.py.out
+++ b/verify-chill/without/test_print_dep.py.out
diff --git a/test-chill/without/test_print_space.py.out b/verify-chill/without/test_print_space.py.out
index f96eb53..f96eb53 100644
--- a/test-chill/without/test_print_space.py.out
+++ b/verify-chill/without/test_print_space.py.out
diff --git a/test-chill/without/test_reverse.py.out b/verify-chill/without/test_reverse.py.out
index 617d115..617d115 100644
--- a/test-chill/without/test_reverse.py.out
+++ b/verify-chill/without/test_reverse.py.out
diff --git a/test-chill/without/test_scale.py.out b/verify-chill/without/test_scale.py.out
index 0f7be2a..0f7be2a 100644
--- a/test-chill/without/test_scale.py.out
+++ b/verify-chill/without/test_scale.py.out
diff --git a/test-chill/without/test_shift.py.out b/verify-chill/without/test_shift.py.out
index a26758b..a26758b 100644
--- a/test-chill/without/test_shift.py.out
+++ b/verify-chill/without/test_shift.py.out
diff --git a/test-chill/without/test_shift_to.py.out b/verify-chill/without/test_shift_to.py.out
index b29724c..b29724c 100644
--- a/test-chill/without/test_shift_to.py.out
+++ b/verify-chill/without/test_shift_to.py.out
diff --git a/test-chill/without/test_skew.py.out b/verify-chill/without/test_skew.py.out
index 04c4208..04c4208 100644
--- a/test-chill/without/test_skew.py.out
+++ b/verify-chill/without/test_skew.py.out
diff --git a/test-chill/without/test_tile.py.out b/verify-chill/without/test_tile.py.out
index e59933e..e59933e 100644
--- a/test-chill/without/test_tile.py.out
+++ b/verify-chill/without/test_tile.py.out
diff --git a/test-chill/without/test_unroll.py.out b/verify-chill/without/test_unroll.py.out
index 91bfa4f..91bfa4f 100644
--- a/test-chill/without/test_unroll.py.out
+++ b/verify-chill/without/test_unroll.py.out
diff --git a/test-chill/without/test_unroll_extra.py.out b/verify-chill/without/test_unroll_extra.py.out
index 2126860..2126860 100644
--- a/test-chill/without/test_unroll_extra.py.out
+++ b/verify-chill/without/test_unroll_extra.py.out
author	Derick Huth <derickhuth@gmail.com>	2016-02-10 11:13:08 -0700
committer	Derick Huth <derickhuth@gmail.com>	2016-02-10 11:13:08 -0700
commit	1dd03ee01bff2a70e758ce984476527f3ff42c68 (patch)
tree	9731867c7019ec9b6ee111c8fa9f92a92119b5ec
parent	4631ad76927d433da5d55c3c373a1dfd0f74c9d4 (diff)
parent	d68532f2f3ba332199f84818cb047d69a3f33588 (diff)
download	chill-1dd03ee01bff2a70e758ce984476527f3ff42c68.tar.gz chill-1dd03ee01bff2a70e758ce984476527f3ff42c68.tar.bz2 chill-1dd03ee01bff2a70e758ce984476527f3ff42c68.zip