diff options
author | dhuth <derickhuth@gmail.com> | 2014-08-27 09:52:06 -0600 |
---|---|---|
committer | dhuth <derickhuth@gmail.com> | 2014-08-27 09:52:06 -0600 |
commit | bff810cc371a38f493d688c54f71013f5a7d53bf (patch) | |
tree | fbe86954bb3c01deb21da9e41ebff5baa2889a45 | |
download | chill-bff810cc371a38f493d688c54f71013f5a7d53bf.tar.gz chill-bff810cc371a38f493d688c54f71013f5a7d53bf.tar.bz2 chill-bff810cc371a38f493d688c54f71013f5a7d53bf.zip |
Initial commit
96 files changed, 41476 insertions, 0 deletions
@@ -0,0 +1,18 @@ +BUILD +===== + +1. Edit Makefile. Change SUIFHOME and OMEGAHOME to correct paths. + +2. Do "make depend" in the chill directory. + +3. Optional, do "make clean" or "make veryclean" which removes additional + target files and flex/bison generated files. + +4. Do "make". + + +INSTALLATION +============ + +You can use CHiLL in source directory since all links are already +created in bin/, lib/ and include/ directories. @@ -0,0 +1,678 @@ +CHiLL 0.1.0 - 0.1.4 Copyright (C) 2008 University of Southern California, +CHiLL 0.1.5 and up Copyright (C) 2009 University of Utah. +All rights reserved. + + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + <program> Copyright (C) <year> <name of author> + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +<http://www.gnu.org/licenses/>. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +<http://www.gnu.org/philosophy/why-not-lgpl.html>. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3285f8c --- /dev/null +++ b/Makefile @@ -0,0 +1,247 @@ + +.SUFFIXES: +.PHONY: all depend depend-cuda-chill clean veryclean cuda-chill +.PHONY: chill + +CC = g++ +CFLAGS = -g -Wno-write-strings +DEPENDENCE_CFLAGS = -M +#OMEGAHOME = $(HOME)/omega + +ifdef TEST_COVERAGE + CFLAGS := $(CFLAGS) -fprofile-arcs -ftest-coverage +endif + +# TODO auto-generate using config.h generated by autoconf? +CHILLVERSION = "\"0.2.0\"" +PYTHON=python #=$(shell `which python` ) +PYVERSION=$(shell $(PYTHON) -c "import sys; print(sys.version[:3])") # 2.6 +PYTHONVER = python$(PYVERSION) +PYTHONINCLUDE = $(shell $(PYTHON) -c "from distutils import sysconfig; print(sysconfig.get_python_inc())") +PYTHONLIBDIR = $(shell $(PYTHON) -c "from distutils import sysconfig; print(sysconfig.get_config_var('LIBDIR'))") +PYTHONCONFIG = $(shell $(PYTHON) -c "from distutils import sysconfig; print(sysconfig.get_config_var('LIBPL'))") +# SCRIPT_LANG = lua <-- supplied by the command line + + +# this creates a LUAHOME even if you don't have such a directory +ifeq ($(strip $(wildcard $(LUAHOME))),) +LUAHOME = $(HOME)/lua +endif +LUA_PATH = -L${LUAHOME}/lib + + +# where do include files live +INC_PATH = -I${PYTHONINCLUDE} -I${OMEGAHOME}/include -I${LUAHOME}/include + +# where do libraries live +LIB_PATH = -L${OMEGAHOME}/code_gen/obj -L${OMEGAHOME}/omega_lib/obj +# seemingly not needed -L${PYTHONCONFIG} + + + +CORE_LIBS = -lm -lcodegen -lomega +RUNNER_LIBS = -llua -ldl -lreadline -lhistory -lpthread -ldl -lutil -lm -l${PYTHONVER} + +TDLHOME = ${ROSEHOME}/libltdl + +BOOST_DATE_TIME_LIB = -lboost_date_time +BOOST_FILESYSTEM_LIB = -lboost_filesystem +BOOST_LDFLAGS = -L${BOOSTHOME}/lib +BOOST_PROGRAM_OPTIONS_LIB = -lboost_program_options +BOOST_REGEX_LIB = -lboost_regex +BOOST_SYSTEM_LIB = -lboost_system +BOOST_THREAD_LIB = -lboost_thread +BOOST_WAVE_LIB = -lboost_wave + +ROSE_LIBS = -lrose $(BOOST_LDFLAGS) $(BOOST_DATE_TIME_LIB)\ + $(BOOST_THREAD_LIB) $(BOOST_FILESYSTEM_LIB) $(BOOST_PROGRAM_OPTIONS_LIB)\ + $(BOOST_REGEX_LIB) $(BOOST_SYSTEM_LIB) $(BOOST_SERIALIZATION_LIB) \ + $(BOOST_WAVE_LIB) -lrt -ldl + + +# Source files common to both chill and cuda-chill +CORE_SRCS = dep.cc omegatools.cc irtools.cc loop.cc loop_basic.cc loop_datacopy.cc loop_unroll.cc loop_tile.cc loop_extra.cc +LIB_SRCS = $(CORE_SRCS) + +# files that will be generated by bison, flex, and make that need to be removed at clean. +GENERATED_SRCS = parser.tab.hh parser.tab.cc parse_expr.yy.cc parse_expr.ll.hh parse_expr.tab.cc parse_expr.tab.hh Makefile.deps +# object files that are specific to lua or python builds. -- This is used so that SCRIPT_LANG does not need to be specified during clean +ORPHAN_OBJS = chill_run_util.o chillmodule.o parse_expr.tab.o parse_expr.yy.o + +# files used in chill and cuda-chill interfaces +ifeq ($(SCRIPT_LANG),lua) + RUNNER_SRCS = chill_run.cc chill_env.cc +else + ifeq ($(SCRIPT_LANG),python) + RUNNER_SRCS = chill_run.cc chillmodule.cc + else + RUNNER_SRCS = chill_run.cc chill_env.cc + endif +endif + +# files used in chill but not cuda-chill +IR_CHILL_SRCS = ir_rose.cc ir_rose_utils.cc +ifeq ($(SCRIPT_LANG),lua) + YACC_SRCS = parse_expr.yy.cc parse_expr.tab.cc + CHILL_RUNNER_SRCS = chill_run_util.cc + CHILL_SRCS = $(CORE_SRCS) $(IR_CHILL_SRCS) $(CHILL_RUNNER_SRCS) $(RUNNER_SRCS) +else + ifeq ($(SCRIPT_LANG),python) + YACC_SRCS = parse_expr.yy.cc parse_expr.tab.cc + CHILL_RUNNER_SRCS = chill_run_util.cc + CHILL_SRCS = $(CORE_SRCS) $(IR_CHILL_SRCS) $(CHILL_RUNNER_SRCS) $(RUNNER_SRCS) + else + YACC_SRCS = lex.yy.cc parser.tab.cc + CHILL_RUNNER_SRCS = + CHILL_SRCS = $(CORE_SRCS) $(IR_CHILL_SRCS) $(YACC_SRCS) $(RUNNER_SRCS) + endif +endif + +# source files for cuda-chill but not chill +CUDACHILL_ONLY_SRCS = mem_mapping_utils.cc loop_cuda_rose.cc +IR_CUDACHILL_SRCS = ir_rose.cc ir_rose_utils.cc ir_cudarose.cc ir_cuda_rose_utils.cc +CUDACHILL_RUNNER_SRCS = +CUDACHILL_SRCS = $(CORE_SRCS) $(CUDACHILL_ONLY_SRCS) $(IR_CUDACHILL_SRCS) $(RUNNER_SRCS) $(CUDACHILL_RUNNER_SRCS) + +# set interface language flags +ifeq ($(SCRIPT_LANG),lua) + RUNNER_EXTRA_CFLAGS = -DLUA +else + ifeq ($(SCRIPT_LANG),python) + RUNNER_EXTRA_CFLAGS = -DPYTHON + endif +endif + +depend-cuda-chill: CFLAGS := $(CFLAGS) -DCUDACHILL +cuda-chill: CFLAGS := $(CFLAGS) -DCUDACHILL + +ALL_SRCS = $(CORE_SRCS) $(YACC_SRCS) $(IR_CHILL_SRCS) $(CUDACHILL_ONLY_SRCS) $(IR_CUDACHILL_SRCS) $(RUNNER_SRCS) $(CHILL_RUNNER_SRCS) $(CUDACHILL_RUNNER_SRCS) +ALL_OBJS = $(ALL_SRCS:.cc=.o) $(ORPHAN_OBJS) + +RUNNER_DEFINES = -DLUA_USE_LINUX -DCHILL_BUILD_VERSION=$(CHILLVERSION) -DCHILL_BUILD_DATE="\"$(CHILL_BUILD_DATE)\"" + + +YACC_EXTRA_CFLAGS = + +##################################################################### +# compiler intermediate code specific definitions +##################################################################### + + + +#LIBS := $(LIBS) $(ROSE_LIBS) +LIB_PATH := $(LIB_PATH) -L${ROSEHOME}/lib -L${TDLHOME} +#LIB_SRCS := $(LIB_SRCS) # $(IR_SRCS) +INC_PATH := $(INC_PATH) -I${ROSEHOME}/include -I${BOOSTHOME}/include +YACC_EXTRA_CFLAGS := -DBUILD_ROSE +RUNNER_EXTRA_CFLAGS := $(RUNNER_EXTRA_CFLAGS) -DBUILD_ROSE + + +##################################################################### +# build rules +##################################################################### + +YACC_OBJS = $(YACC_SRCS:.cc=.o) +RUNNER_OBJS = $(RUNNER_SRCS:.cc=.o) +CHILL_RUNNER_OBJS = $(CHILL_RUNNER_SRCS:.cc=.o) +CUDACHILL_RUNNER_OBJS = $(CUDACHILL_RUNNER_SRCS:.cc=.o) +LIB_OBJS = $(LIB_SRCS:.cc=.o) +IR_CHILL_OBJS = $(IR_CHILL_SRCS:.cc=.o) +IR_CUDACHILL_OBJS = $(IR_CUDACHILL_SRCS:.cc=.o) +CUDACHILL_ONLY_OBJS = $(CUDACHILL_ONLY_SRCS:.cc=.o) + +CHILL_OBJS = $(CHILL_SRCS:.cc=.o) +CUDACHILL_OBJS = $(CUDACHILL_SRCS:.cc=.o) + + +all: cuda-chill chill + + +# can't these be combined to a superset of all source files? +depend: depend-cuda-chill + +depend-chill: $(LIB_SRCS) $(RUNNER_SRCS) $(CHILL_RUNNER_SRCS) $(YACC_SRCS) + $(CC) $(DEPENDENCE_CFLAGS) $(INC_PATH) $(LIB_SRCS) $(RUNNER_SRCS) $(CHILL_RUNNER_SRCS) $(YACC_SRCS) > Makefile.deps + +depend-cuda-chill: $(LIB_SRCS) $(RUNNER_SRCS) $(CUDACHILL_RUNNER_SRCS) + $(CC) $(DEPENDENCE_CFLAGS) $(INC_PATH) $(LIB_SRCS) $(RUNNER_SRCS) $(CUDACHILL_RUNNER_SRCS) > Makefile.deps + +libchill_xform.a: $(LIB_OBJS) $(IR_CHILL_OBJS) + ar -rs $@ $(LIB_OBJS) $(IR_CHILL_OBJS) + +libcudachill_xform.a: $(LIB_OBJS) $(IR_CUDACHILL_OBJS) $(CUDACHILL_ONLY_OBJS) + ar -rs $@ $(LIB_OBJS) $(IR_CUDACHILL_OBJS) $(CUDACHILL_ONLY_OBJS) + +%.o: %.cc + $(CC) $(CFLAGS) $(INC_PATH) $< -c -o $@ + + +clean: + @rm -fr $(ALL_OBJS) $(YACC_SRCS) $(GENERATED_SRCS) + +veryclean: + @rm -fr $(ALL_OBJS) $(YACC_SRCS) libchill_xform.a libcudachill_xform.a chill cuda-chill + + +cuda-chill: libcudachill_xform.a $(CUDACHILL_RUNNER_OBJS) $(RUNNER_OBJS) + $(CC) $(CFLAGS) $(LIB_PATH) $(LUA_PATH) $(CUDACHILL_RUNNER_OBJS) $(RUNNER_OBJS) $< $(CORE_LIBS) $(ROSE_LIBS) $(RUNNER_LIBS) -o $@ + +ifeq ($(SCRIPT_LANG),lua) +chill: libchill_xform.a $(CHILL_RUNNER_OBJS) $(RUNNER_OBJS) $(YACC_OBJS) + $(CC) $(CFLAGS) $(LIB_PATH) $(LUA_PATH) $(YACC_OBJS) $(CHILL_RUNNER_OBJS) $(RUNNER_OBJS) $< $(CORE_LIBS) $(ROSE_LIBS) $(RUNNER_LIBS) -o $@ +else +ifeq ($(SCRIPT_LANG),python) +chill: libchill_xform.a $(CHILL_RUNNER_OBJS) $(RUNNER_OBJS) $(YACC_OBJS) + $(CC) $(CFLAGS) $(LIB_PATH) $(YACC_OBJS) $(CHILL_RUNNER_OBJS) $(RUNNER_OBJS) $< $(CORE_LIBS) $(ROSE_LIBS) $(RUNNER_LIBS) -o $@ + +else +chill: libchill_xform.a $(YACC_OBJS) + $(CC) $(CFLAGS) $(LIB_PATH) $(YACC_OBJS) $< $(CORE_LIBS) $(ROSE_LIBS) -o $@ +endif +endif + + +lex.yy.cc: parser.ll parser.tab.hh + flex++ parser.ll + +lex.yy.o: lex.yy.cc + $(CC) $(CFLAGS) -c $< -o $@ + +parser.tab.hh parser.tab.cc: parser.yy + bison -t -d $< + +parser.tab.o: parser.tab.cc + $(CC) $(CFLAGS) $(YACC_EXTRA_CFLAGS) $(INC_PATH) -DCHILL_BUILD_DATE="\"$(CHILL_BUILD_DATE)\"" -c $< -o $@ + + +parse_expr.tab.cc: parse_expr.yy + bison -t -d parse_expr.yy + +parse_expr.tab.o: parse_expr.tab.cc + $(CC) $(CFLAGS) $(YACC_CFLAGS) $(INC_PATH) -o $@ -c parse_expr.tab.cc + +parse_expr.yy.cc: parse_expr.tab.cc parse_expr.ll + flex -o parse_expr.yy.cc parse_expr.ll + +parse_expr.yy.o: parse_expr.yy.cc + $(CC) $(CFLAGS) $(YACC_CFLAGS) $(INC_PATH) -o $@ -c parse_expr.yy.cc + +$(RUNNER_SRCS:.cc=.o): %.o: %.cc + $(CC) $(CFLAGS) $(RUNNER_EXTRA_CFLAGS) $(INC_PATH) $(RUNNER_DEFINES) $< -c -o $@ + +$(CHILL_RUNNER_SRCS:.cc=.o): %.o: %.cc + $(CC) $(CFLAGS) $(RUNNER_EXTRA_CFLAGS) $(INC_PATH) $(RUNNER_DEFINES) $< -c -o $@ + +$(CUDACHILL_RUNNER_SRCS:.cc=.o): %.o %.cc + $(CC) $(CFLAGS) $(RUNNER_EXTRA_CFLAGS) $(INC_PATH) $(RUNNER_DEFINES) $< -c -o $@ + + +$(IR_SRCS:.cc=.o): %.o: %.cc + $(CC) -Wno-write-strings $(CFLAGS) $(INC_PATH) $< -c -o $@ + +ifeq ($(shell test -f Makefile.deps && echo "true"), true) +include Makefile.deps +endif + +CHILL_BUILD_DATE = $(shell date +%m/%d/%Y) + @@ -0,0 +1,42 @@ +CHiLL 0.2 open source release +See LICENSE file for copyright information. + +CHiLL is a composable high-level loop transformation framework. It +supports a wide variety of loop transformations on a whole loop nest +or individual sub loop nests inside. Different transformations can be +applied in sequence. A scripting interface is also provided and +optimizing parameters used by the script can be modified on the fly. + + +What is new? +============ + +version 0.2: + * More robust transformation composition. + +version 0.1: + * Initial release. + + +DIRECTORIES +=========== + +chill/ source files + examples/ examples for using CHiLL scripts + dep_test/ more examples for calculating data dependence + bin/ links to executables: chill + lib/ links to libraries: libchill_xform.a + include/ links to header files + + +DOCUMENTATION AND QUESTIONS +=========================== + +Currently the best overview can be found here (not up-to-date): + "CHiLL: A Framework for Composing High-Level Loop Transformations", + by Chun Chen, Jacqueline Chame and Mary Hall, + USC CS TR 08-897 + +Software website: + +For questions, bug reports or suggestions, please contact: diff --git a/ROSE_INSTALL.txt b/ROSE_INSTALL.txt new file mode 100644 index 0000000..79e0c43 --- /dev/null +++ b/ROSE_INSTALL.txt @@ -0,0 +1,77 @@ +INSTALLATION STEPS: + +1) Please install Boost library version <= 1.45.0 using these instruccions + +1. Download BOOST. +Download BOOST at www.boost.org/users/download. + +2. Untar BOOST. +Type tar -zxf BOOST-[VersionNumber].tar.gz to untar the BOOST distribution. + +3. Create a separate install tree. +Type mkdir installTree to create a location for the install. + +4. Run the bootstrap.sh script. +Type ./bootstrap.sh --prefix=[installTree] + +5. Run bjam. +Type ./bjam install --prefix=[installTree] + + +6) set your BOOSTHOME environment variable to where you've installed BOOST. + +7) Download the latest version of rose from the website. + https://outreach.scidac.gov/frs/?group_id=24 + +8) set the JAVA_HOME environment variable in your ${HOME}/.bashrc + eg. export JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk + +9) add this to the LD_LIBRARY_PATH environment variable + + LD_LIBRARY_PATH=$JAVA_HOME/jre/lib/i386/server:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${BOOSTHOME}/lib + +10) make a new empty directory separate from the downloaded source directory($ROSE_SRC) for rose. + eg. mkdir ${HOME}/compileTree + +11) set your ROSEHOME environment variable in ${HOME}/.bashrc to ${HOME}/compileTree + +12) run the following command from this ${ROSEHOME} + ${ROSE_SRC}/configure --prefix=${ROSEHOME} --with-boost=${BOOSTHOME} --with-boost-libdir=${BOOSTHOME}/lib -with-haskell=no + +13) run the following command to compile: + make install-core + + +14) Install lua version <= 5.1 (usually not necessary to set the LUAHOME environment variable unless + you installed it in a local directory, in which case set LUAHOME to that directory). Lua is only required for + cuda-chill and not plain chill. + +15) If you are installing for CUDA-CHILL set the CUDACHILL environment variable to true + else false + + +16) Install omega by doing the following commands + i) make clean + ii) make veryclean + iii)make depend + iv) make + +17) Set your OMEGAHOME environment variable to the appropriate directory in ${HOME}/.bashrc + +18) Install cuda-chill by doing the following commands + i) make clean + ii) make veryclean + iii)make depend-cuda-chill + iv) make cuda-chill + + else if you are installing just plain chill + export CUDACHILL=false; (remember to rebuild plain omega as well) + i) make clean + ii) make veryclean + iii)make depend + iv) make + +19) Go to examples/cuda-chill and run ../../cuda-chill mm.lua + +20) If running plain Chill go to examples/chill and run ../../chill gemm.script diff --git a/chill_env.cc b/chill_env.cc new file mode 100644 index 0000000..991f13c --- /dev/null +++ b/chill_env.cc @@ -0,0 +1,1442 @@ +/***************************************************************************** + Copyright (C) 2010 University of Utah. + All Rights Reserved. + + Purpose: + Register variables and functions into the global Lua addres space to + provide an environment for CHiLL scripts + + Notes: + Contains Lua wrappers for the CHiLL Loop class and methods. + + History: + 01/2010 created by Gabe Rudy + 03/2014 added support for CHiLL without Cuda (Derick Huth) +*****************************************************************************/ + +#define lua_c +#include <lua.hpp> //All lua includes wrapped in extern "C" +#include "loop.hh" +#include "chill_env.hh" + +#ifdef CUDACHILL + +#ifdef BUILD_ROSE +#include "loop_cuda_rose.hh" +#include "ir_rose.hh" +#include "ir_cudarose.hh" +#elif BUILD_SUIF +#include "loop_cuda.hh" +#include "ir_suif.hh" +#include "ir_cudasuif.hh" +#endif + +#else + +#include "chill_run_util.hh" +#include <omega.h> +#include "ir_code.hh" +#ifdef BUILD_ROSE +#include "ir_rose.hh" +#elif BUILD_SUIF +#include "ir_suif.hh" +#endif + +#endif + +using namespace omega; + +#ifdef CUDACHILL +extern LoopCuda *myloop; +#else +extern Loop *myloop; +#endif +extern IR_Code *ir_code; +extern bool is_interactive; +extern bool repl_stop; + +std::string procedure_name; +std::string source_filename; + +extern std::vector<IR_Control *> ir_controls; +extern std::vector<int> loops; + +//Macros for wrapping code to myloop-> that translates C++ exceptions to +//Lua stracktraced errors +#define REQUIRE_LOOP try{ if (myloop == NULL){ throw std::runtime_error("loop not initialized"); } +#define END_REQUIRE_LOOP }catch (const std::exception &e) { return luaL_error(L, e.what()); } + +#ifdef CUDACHILL +void register_v1(lua_State *L); +void register_v2(lua_State *L); + +#endif +//Extra param checking +static bool luaL_checkboolean(lua_State *L, int narg) { + if (!lua_isboolean(L,narg)) + luaL_typerror(L, narg, "boolean"); + return lua_toboolean(L, narg); +} + +static bool luaL_optboolean(lua_State *L, int narg, bool def) { + return luaL_opt(L, luaL_checkboolean, narg, def); +} + +static bool tointvector(lua_State *L, int narg, std::vector<int>& v) { + if (!lua_istable(L, narg)) + return false; + + //Iterate through array (table) + lua_pushnil(L); // first key + while (lua_next(L, narg) != 0) { + // uses 'key' (at index -2) and 'value' (at index -1) + v.push_back((int) luaL_checknumber(L, -1)); + //printf("added: %d", v[v.size()-1]); + // removes 'value'; keeps 'key' for next iteration + lua_pop(L, 1); + } + return true; +} + +static bool tointset(lua_State* L, int narg, std::set<int>& s) { + if(!lua_istable(L, narg)) + return false; + // iterate through array (lua table) + lua_pushnil(L); // first key + while (lua_next(L, narg) != 0) { + int val = (int)luaL_checknumber(L, -1); + //printf("added: %d\n", val); + s.insert(val); + lua_pop(L, 1); + } +} + +static bool tostringvector(lua_State *L, int narg, + std::vector<std::string>& v) { + if (!lua_istable(L, narg)) + return false; + + //Iterate through array (table) + lua_pushnil(L); // first key + while (lua_next(L, narg) != 0) { + // uses 'key' (at index -2) and 'value' (at index -1) + v.push_back(luaL_checkstring(L,-1)); + //printf("added: %d", v[v.size()-1]); + // removes 'value'; keeps 'key' for next iteration + lua_pop(L, 1); + } + return true; +} + +static bool tostringmap(lua_State *L, int narg, + std::map<std::string, std::string>& v) { + if (!lua_istable(L, narg)) + return false; + + //Iterate through array (table) + lua_pushnil(L); // first key + while (lua_next(L, narg) != 0) { + // uses 'key' (at index -2) and 'value' (at index -1) + v.insert( + std::make_pair(luaL_checkstring(L,-2), luaL_checkstring(L,-1))); + //printf("added: %d", v[v.size()-1]); + // removes 'value'; keeps 'key' for next iteration + lua_pop(L, 1); + } + return true; +} + +static bool tostringintmap(lua_State *L, int narg, + std::map<std::string, int>& v) { + if (!lua_istable(L, narg)) + return false; + + //Iterate through array (table) + lua_pushnil(L); // first key + while (lua_next(L, narg) != 0) { + // uses 'key' (at index -2) and 'value' (at index -1) + v.insert( + std::make_pair(luaL_checkstring(L,-2), + (int) luaL_checknumber(L, -1))); + //printf("added: %d", v[v.size()-1]); + // removes 'value'; keeps 'key' for next iteration + lua_pop(L, 1); + } + return true; +} + +static bool tostringintmapvector(lua_State *L, int narg, std::vector<std::map<std::string, int> >& v) { + if(!lua_istable(L, narg)) + return false; + lua_pushnil(L); + // Iterate over table + while(lua_next(L, narg) != 0) { + std::map<std::string, int> map; + // use 'value' (at index -1), discard key + // try to parse table as a 'string to int' map. + if(!tostringintmap(L, -1, map)) + return false; + v.push_back(map); + lua_pop(L, 1); + } + return true; +} + +static bool tointmatrix(lua_State *L, int narg, + std::vector<std::vector<int> >& m) { + if (!lua_istable(L, narg)) + return false; + + //Iterate through array (table) + lua_pushnil(L); // first key + while (lua_next(L, narg) != 0) { + // uses 'key' (at index -2) and 'value' (at index -1) + if (!lua_istable(L,-1)) { + lua_pop(L, 2); + return false; + } + m.push_back(std::vector<int>()); + int i = m.size() - 1; + //Now iterate over the keys of the second level table + int l2 = lua_gettop(L); //Index of second level table + lua_pushnil(L); // first key + while (lua_next(L, l2) != 0) { + int k = (int) luaL_checknumber(L, -1); + m[i].push_back(k); + //printf("m[%d][%d] = %d\n", i,m[i].size()-1,k); + lua_pop(L, 1); + } + lua_pop(L, 1); + // removes 'value'; keeps 'key' for next iteration + } + return true; +} + +static void strict_arg_num(lua_State *L, int num) { + int n = lua_gettop(L); //Number of arguments + if (n != num) + throw std::runtime_error("incorrect number of arguments"); +} + +// ------------------------------------------------------------------- +// Initialization and finalization functions +// ------------------------------------------------------------------- +#ifdef CUDACHILL +/* The function we'll call from the lua script */ +static int init(lua_State *L) { + int n = lua_gettop(L); //Number of arguments + if (n > 0) { + //Expet one of the following forms + //l1 = init("mm4.sp2",0,0) --input file, procedure 0, loop 0 + //or + //l1 = init("mm4.sp2","NameFromPragma") + + const char* source_filename = luaL_optstring(L,1,0); +#ifdef BUILD_ROSE + if(lua_isstring(L,2)) { + const char* procedure_name = luaL_optstring(L, 2, 0); +#elif BUILD_SUIF + if (lua_isnumber(L, 2)) { + int procedure_number = luaL_optint(L, 2, 0); +#endif + int loop_num = luaL_optint(L, 3, 0); + + lua_getglobal(L, "dest"); + const char* dest_lang = lua_tostring(L,-1); + lua_pop(L, 1); +#ifdef BUILD_ROSE + ir_code = new IR_cudaroseCode(source_filename, procedure_name); +#elif BUILD_SUIF + //ir_code = new IR_cudasuifCode(source_filename, procedure_number, dest_lang); + + ir_code = new IR_cudasuifCode(source_filename, procedure_number); + //myloop = new LoopCuda(ir_code->init_loop(loop_num), loop_num); //protonu--using the modified constructor + + //protonu--here goes my initializations + //A lot of this code was lifted from Chun's parser.yy + //the plan is now to create the LoopCuda object directly +#endif + IR_Block *block = ir_code->GetCode(); + ir_controls = ir_code->FindOneLevelControlStructure(block); + +#ifdef BUILD_ROSE + + int loop_count = 0; + for (int i = 0; i < ir_controls.size(); i++) { + if (ir_controls[i]->type() == IR_CONTROL_LOOP) { + loops.push_back(i); + loop_count++; + } + } + delete block; + + std::vector<IR_Control *> parm; + for(int j = 0; j < loop_count; j++) + parm.push_back(ir_controls[loops[j]]); + + block = ir_code->MergeNeighboringControlStructures(parm); +#elif BUILD_SUIF + for (int i = 0; i < ir_controls.size(); i++) + if (ir_controls[i]->type() == IR_CONTROL_LOOP) + loops.push_back(i); + delete block; + + std::vector<IR_Control *> parm; + parm.push_back(ir_controls[loop_num]); + + block = ir_code->MergeNeighboringControlStructures(parm); +#endif + myloop = new LoopCuda(block, loop_num); + delete block; + + //end-protonu + + } else { + //TODO: handle pragma lookup + } + //Also register a different set of global functions + myloop->original(); + myloop->useIdxNames = true; //Use idxName in code_gen + register_v2(L); + //TODO: return a reference to the intial array if that makes sense + //still + return 0; + } + lua_getglobal(L, "source"); + const char* source_filename = lua_tostring(L,-1); + lua_pop(L, 1); + + lua_getglobal(L, "dest"); + const char* dest_lang = lua_tostring(L,-1); + lua_pop(L, 1); + + lua_getglobal(L, "procedure"); +#ifdef BUILD_ROSE + const char* procedure_name = lua_tostring(L , -1); +#elif BUILD_SUIF + int procedure_number = lua_tointeger(L,-1); +#endif + lua_pop(L, 1); + + lua_getglobal(L, "loop"); + int loop_num = lua_tointeger(L, -1); + lua_pop(L, 1); + +//ir_code = new IR_cudasuifCode(source_filename, procedure_number, dest_lang); +#ifdef BUILD_ROSE + ir_code = new IR_cudaroseCode(source_filename, procedure_name); + +#elif BUILD_SUIF + ir_code = new IR_cudasuifCode(source_filename, procedure_number); +//myloop = new LoopCuda(ir_code->init_loop(loop_num), loop_num); //protonu--using the modified constructor +//protonu--here goes my initializations +//A lot of this code was lifted from Chun's parser.yy +//the plan is now to create the LoopCuda object directly +#endif + IR_Block *block = ir_code->GetCode(); + ir_controls = ir_code->FindOneLevelControlStructure(block); + +#ifdef BUILD_ROSE + + int loop_count = 0; + for (int i = 0; i < ir_controls.size(); i++) { + if (ir_controls[i]->type() == IR_CONTROL_LOOP) { + loops.push_back(i); + loop_count++; + } + } + delete block; + + std::vector<IR_Control *> parm; + for(int j = 0; j < loop_count; j++) + parm.push_back(ir_controls[loops[j]]); + + block = ir_code->MergeNeighboringControlStructures(parm); +#elif BUILD_SUIF + for (int i = 0; i < ir_controls.size(); i++) + if (ir_controls[i]->type() == IR_CONTROL_LOOP) + loops.push_back(i); + delete block; + + std::vector<IR_Control *> parm; + parm.push_back(ir_controls[loop_num]); + + block = ir_code->MergeNeighboringControlStructures(parm); +#endif + myloop = new LoopCuda(block, loop_num); + delete block; + +//register_v1(L); + register_v2 (L); + return 0; +} +#else +static void strict_arg_num(lua_State* L, int min, int max) { + int n = lua_gettop(L); + if(n < min || n > max) + throw std::runtime_error("incorrect number of arguments"); +} + +int get_loop_num_start(lua_State *L) { + lua_getglobal(L, "loop_start"); + int loop_num_start = lua_tointeger(L, -1); + lua_pop(L, 1); + return loop_num_start; +} +int get_loop_num_end(lua_State* L) { + lua_getglobal(L, "loop_end"); + int loop_num_end = lua_tointeger(L, -1); + lua_pop(L, 1); + return loop_num_end; +} + +static int set_loop_num_start(lua_State *L, int start_num) { + lua_pushinteger(L, start_num); + lua_setglobal(L, "loop_start"); +} +static int set_loop_num_end(lua_State *L, int end_num) { + lua_pushinteger(L, end_num); + lua_setglobal(L, "loop_end"); +} + +static int source(lua_State* L) { + if(!source_filename.empty()) { + fprintf(stderr, "only one file can be handled in a script"); + if(!is_interactive) + exit(2); + } + source_filename = luaL_checkstring(L, 1); + return 0; +} + + +static int procedure(lua_State* L) { + if(!procedure_name.empty()) { + fprintf(stderr, "only one procedure can be handled in a script"); + if(!is_interactive) + exit(2); + } + procedure_name = luaL_checkstring(L, 1); + return 0; +} + +void finalize_loop(int loop_num_start, int loop_num_end) { + if (loop_num_start == loop_num_end) { + ir_code->ReplaceCode(ir_controls[loops[loop_num_start]], myloop->getCode()); + ir_controls[loops[loop_num_start]] = NULL; + } + else { + std::vector<IR_Control *> parm; + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) + parm.push_back(ir_controls[i]); + IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); + ir_code->ReplaceCode(block, myloop->getCode()); + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { + delete ir_controls[i]; + ir_controls[i] = NULL; + } + } + delete myloop; +} +void finalize_loop(lua_State* L) { + int loop_num_start = get_loop_num_start(L); + int loop_num_end = get_loop_num_end(L); + finalize_loop(loop_num_start, loop_num_end); +} +static void init_loop(lua_State* L, int loop_num_start, int loop_num_end) { + if (source_filename.empty()) { + fprintf(stderr, "source file not set when initializing the loop"); + if (!is_interactive) + exit(2); + } + else { + if (ir_code == NULL) { + #ifdef BUILD_ROSE + if (procedure_name.empty()) + procedure_name = "main"; + #elif BUILD_SUIF + if (procedure_number == -1) + procedure_number = 0; + #endif + + #ifdef BUILD_ROSE + ir_code = new IR_roseCode(source_filename.c_str(), procedure_name.c_str()); + #elif BUILD_SUIF + ir_code = new IR_suifCode(source_filename.c_str(), procedure_name.c_str()); + #endif + + IR_Block *block = ir_code->GetCode(); + ir_controls = ir_code->FindOneLevelControlStructure(block); + for (int i = 0; i < ir_controls.size(); i++) { + if (ir_controls[i]->type() == IR_CONTROL_LOOP) + loops.push_back(i); + } + delete block; + } + if (myloop != NULL && myloop->isInitialized()) { + finalize_loop(L); + } + } + set_loop_num_start(L, loop_num_start); + set_loop_num_end(L, loop_num_end); + if (loop_num_end < loop_num_start) { + fprintf(stderr, "the last loop must be after the start loop"); + if (!is_interactive) + exit(2); + } + if (loop_num_end >= loops.size()) { + fprintf(stderr, "loop %d does not exist", loop_num_end); + if (!is_interactive) + exit(2); + } + std::vector<IR_Control *> parm; + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { + if (ir_controls[i] == NULL) { + fprintf(stderr, "loop has already been processed"); + if (!is_interactive) + exit(2); + } + parm.push_back(ir_controls[i]); + } + IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); + myloop = new Loop(block); + delete block; + //if (is_interactive) printf("%s ", PROMPT_STRING); +} + +static int loop(lua_State* L) { + // loop (n) + // loop (n:m) + int nargs = lua_gettop(L); + int start_num; + int end_num; + if(nargs == 1) { + start_num = luaL_optint(L, 1, 0); + end_num = start_num; + } + else if(nargs == 2) { + start_num = luaL_optint(L, 1, 0); + end_num = luaL_optint(L, 2, 0); + } + else { + fprintf(stderr, "loop takes one or two arguments"); + if(!is_interactive) + exit(2); + } + init_loop(L, start_num, end_num); + return 0; +} +#endif + +#ifdef CUDACHILL + + static int exit(lua_State *L) { + strict_arg_num(L, 0); + repl_stop = true; + return 0; + } + + static int print_code(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 0); + myloop->printCode(); + printf("\n"); + END_REQUIRE_LOOP; + return 0; + } + + static int print_ri(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 0); + myloop->printRuntimeInfo(); + printf("\n"); + END_REQUIRE_LOOP; + return 0; + } + + static int print_idx(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 0); + myloop->printIndexes(); + printf("\n"); + END_REQUIRE_LOOP; + return 0; + } + + static int print_dep(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 0); + std::cout << myloop->dep; + END_REQUIRE_LOOP; + return 0; + } + + static int print_space(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 0); + for (int i = 0; i < myloop->stmt.size(); i++) { + printf("s%d: ", i + 1); + Relation r; + if (!myloop->stmt[i].xform.is_null()) + r = Composition(copy(myloop->stmt[i].xform), copy(myloop->stmt[i].IS)); + else + r = copy(myloop->stmt[i].IS); + r.simplify(2, 4); + r.print(); + }END_REQUIRE_LOOP; + return 0; + } + + static int num_statement(lua_State *L) { + REQUIRE_LOOP; + lua_pushinteger(L, myloop->stmt.size()); + END_REQUIRE_LOOP; + return 1; + } + + static int does_var_exists(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 1); + std::string symName = luaL_optstring(L,1,""); + lua_pushboolean(L, myloop->symbolExists(symName)); + END_REQUIRE_LOOP; + return 1; + } + + static int add_sync(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 2); + int stmt = luaL_optint(L,1,0); + std::string idxName = luaL_optstring(L,2,""); + myloop->addSync(stmt, idxName); + END_REQUIRE_LOOP; + return 0; + } + + static int rename_index(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 3); + int stmt = luaL_optint(L,1,0); + std::string idxName = luaL_optstring(L,2,""); + std::string newName = luaL_optstring(L,3,""); + myloop->renameIndex(stmt, idxName, newName); + END_REQUIRE_LOOP; + return 0; + } + +//basic on index names + static int permute_v2(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 2); + int stmt = luaL_optint(L,1,0); + std::vector<std::string> order; + if (!tostringvector(L, 2, order)) { + throw std::runtime_error("second arg must be a string vector"); + } + myloop->permute_cuda(stmt, order); + END_REQUIRE_LOOP; + return 0; + } + + static int tile_v2(lua_State *L) { + REQUIRE_LOOP; + int n = lua_gettop(L); //Number of arguments + if (n != 3 && n != 7) + throw std::runtime_error("incorrect number of arguments"); + int stmt_num = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + if (n == 3) { + int outer_level = luaL_optint(L, 3, 1); + myloop->tile_cuda(stmt_num, level, outer_level); + } else { + int tile_size = luaL_optint(L, 3, 0); + int outer_level = luaL_optint(L, 4, 1); + std::string idxName = luaL_optstring(L,5,""); + std::string ctrlName = luaL_optstring(L,6,""); + TilingMethodType method = StridedTile; + if (n > 6) { + int imethod = luaL_optint(L, 7, 2); + if (imethod == 0) + method = StridedTile; + else if (imethod == 1) + method = CountedTile; + else { + throw std::runtime_error( + "7th argument must be either strided or counted"); + } + } + myloop->tile_cuda(stmt_num, level, tile_size, outer_level, idxName, + ctrlName, method); + }END_REQUIRE_LOOP; + return 0; + } + + static int cur_indices(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 1); + int stmt_num = luaL_optint(L, 1, 0); +//TODO: needs to be per stmt + lua_createtable(L, myloop->idxNames[stmt_num].size(), 0); + for (int i = 0; i < myloop->idxNames[stmt_num].size(); i++) { + lua_pushinteger(L, i + 1); + lua_pushstring(L, myloop->idxNames[stmt_num][i].c_str()); + lua_settable(L, -3); + }END_REQUIRE_LOOP; + return 1; + } + + static int block_indices(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 0); + lua_newtable(L); + if (myloop->cu_bx > 1) { + lua_pushinteger(L, 1); + lua_pushstring(L, "bx"); + lua_settable(L, -3); + } + if (myloop->cu_by > 1) { + lua_pushinteger(L, 2); + lua_pushstring(L, "by"); + lua_settable(L, -3); + }END_REQUIRE_LOOP; + return 1; + } + + static int thread_indices(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 0); + lua_newtable(L); + if (myloop->cu_tx > 1) { + lua_pushinteger(L, 1); + lua_pushstring(L, "tx"); + lua_settable(L, -3); + } + if (myloop->cu_ty > 1) { + lua_pushinteger(L, 2); + lua_pushstring(L, "ty"); + lua_settable(L, -3); + } + if (myloop->cu_tz > 1) { + lua_pushinteger(L, 3); + lua_pushstring(L, "tz"); + lua_settable(L, -3); + }END_REQUIRE_LOOP; + return 1; + } + + static int block_dims(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 0); + lua_pushinteger(L, myloop->cu_bx); + lua_pushinteger(L, myloop->cu_by); + END_REQUIRE_LOOP; + return 2; + } + + static int thread_dims(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 0); + lua_pushinteger(L, myloop->cu_tx); + lua_pushinteger(L, myloop->cu_ty); + lua_pushinteger(L, myloop->cu_tz); + END_REQUIRE_LOOP; + return 3; + } + + static int hard_loop_bounds(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 2); + int stmt = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + int upper, lower; + myloop->extractCudaUB(stmt, level, upper, lower); + lua_pushinteger(L, lower); + lua_pushinteger(L, upper); + END_REQUIRE_LOOP; + return 2; + } + + static int datacopy_v2(lua_State *L) { + REQUIRE_LOOP; + int n = lua_gettop(L); //Number of arguments + +//overload 1 +//examples: +// datacopy(0,4,a,false,0,1,-16) +// datacopy(0,3,2,1) + if (n < 4 || n > 9) + throw std::runtime_error("incorrect number of arguments"); + int stmt_num = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + const char* array_name = luaL_optstring(L, 3, 0); + std::vector<std::string> new_idxs; + if (!tostringvector(L, 4, new_idxs)) + throw std::runtime_error("fourth argument must be an array of strings"); + bool allow_extra_read = luaL_optboolean(L, 5, false); + int fastest_changing_dimension = luaL_optint(L, 6, -1); + int padding_stride = luaL_optint(L, 7, 1); + int padding_alignment = luaL_optint(L, 8, 1); + bool cuda_shared = luaL_optboolean(L, 9, false); + myloop->datacopy_cuda(stmt_num, level, array_name, new_idxs, allow_extra_read, + fastest_changing_dimension, padding_stride, padding_alignment, + cuda_shared); + END_REQUIRE_LOOP; + return 0; + } + + static int datacopy_privatized_v2(lua_State *L) { + REQUIRE_LOOP; + int n = lua_gettop(L); //Number of arguments + +//example: +//datacopy_privatized(0,3,"a",{4,5},false,-1,1,1) + if (n < 4 || n > 9) + throw std::runtime_error("incorrect number of arguments"); + int stmt_num = luaL_optint(L, 1, 0); + std::string level_idx = luaL_optstring(L,2,""); + int level = myloop->findCurLevel(stmt_num, level_idx); + const char* array_name = luaL_optstring(L, 3, 0); + + std::vector<std::string> privatized_idxs; + if (!tostringvector(L, 4, privatized_idxs)) + throw std::runtime_error("4th argument must be an array of index strings"); + std::vector<int> privatized_levels(privatized_idxs.size()); + for (int i = 0; i < privatized_idxs.size(); i++) + privatized_levels[i] = myloop->findCurLevel(stmt_num, privatized_idxs[i]); + + bool allow_extra_read = luaL_optboolean(L, 5, false); + int fastest_changing_dimension = luaL_optint(L, 6, -1); + int padding_stride = luaL_optint(L, 7, 1); + int padding_alignment = luaL_optint(L, 8, 1); + bool cuda_shared = luaL_optboolean(L, 9, false); + myloop->datacopy_privatized_cuda(stmt_num, level, array_name, privatized_levels, + allow_extra_read, fastest_changing_dimension, padding_stride, + padding_alignment, cuda_shared); + END_REQUIRE_LOOP; + return 0; + } + + static int unroll_v2(lua_State *L) { + REQUIRE_LOOP; +//int n = lua_gettop(L); //Number of arguments + strict_arg_num(L, 3); + int stmt_num = luaL_optint(L, 1, 0); + int level; + if (lua_isnumber(L, 2)) { + level = luaL_optint(L, 2, 0); + } else { + std::string level_idx = luaL_optstring(L,2,""); + level = myloop->findCurLevel(stmt_num, level_idx); + } + int unroll_amount = luaL_optint(L, 3, 0); + bool does_expand = myloop->unroll_cuda(stmt_num, level, unroll_amount); + lua_pushboolean(L, does_expand); + END_REQUIRE_LOOP; + return 1; + } + + static int cudaize_v2(lua_State *L) { + REQUIRE_LOOP; + //int n = lua_gettop(L); //Number of arguments + strict_arg_num(L, 3); + + std::string kernel_name = luaL_optstring(L, 1, 0); + + std::vector<std::string> blockIdxs; + std::vector<std::string> threadIdxs; + std::map<std::string, int> array_sizes; + if (!tostringintmap(L, 2, array_sizes)) + throw std::runtime_error("second argument must be a map[string->int]"); + + if (lua_istable(L, 3)) { + //Iterate through array (table) + lua_pushnil(L); // first key + while (lua_next(L, 3) != 0) { + // uses 'key' (at index -2) and 'value' (at index -1) + if (strcmp(luaL_checkstring(L,-2), "block") == 0) { + if (!tostringvector(L, lua_gettop(L), blockIdxs)) + throw std::runtime_error( + "third argument must have a string list for its 'block' key"); + } else if (strcmp(luaL_checkstring(L,-2), "thread") == 0) { + if (!tostringvector(L, lua_gettop(L), threadIdxs)) + throw std::runtime_error( + "third argument must have a string list for its 'thread' key"); + } else { + goto v2NotTable; + } + lua_pop(L, 1); + } + } else { + v2NotTable: throw std::runtime_error( + "third argument must be a table with 'block' and 'thread' as potential keys and list of indexes as values"); + } + myloop->cudaize_v2(kernel_name, array_sizes, blockIdxs, threadIdxs); + END_REQUIRE_LOOP; + return 0; + } + + int get_loop_num(lua_State *L) { + lua_getglobal(L, "loop"); + int loop_num = lua_tointeger(L, -1); + lua_pop(L, 1); + return loop_num; + } + + + static int copy_to_texture(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 1); + std::string array_name = luaL_optstring(L,1,0); + myloop->copy_to_texture(array_name.c_str()); + END_REQUIRE_LOOP; + return 0; + } + + /*static int copy_to_texture_2d(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 3); + std::string array_name = luaL_optstring(L, 1, 0); + int width = luaL_optint(L, 2, 0); + int height = luaL_optint(L, 3, 0); + myloop->copy_to_texture_2d(array_name.c_str(), width, height); + END_REQUIRE_LOOP; + return 0; + }*/ + +//protonu-constant memory--place holder for now + static int copy_to_constant(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 1); + std::string array_name = luaL_optstring(L,1,0); +//call to loop->copy_to_texture goes here + myloop->copy_to_constant(array_name.c_str()); + END_REQUIRE_LOOP; + return 0; + + } +#else + + static int print_code(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 0); + myloop->printCode(); + printf("\n"); + END_REQUIRE_LOOP; + return 0; + } + + static int print_dep(lua_State* L) { + REQUIRE_LOOP; + myloop->printDependenceGraph(); + END_REQUIRE_LOOP; + return 0; + } + + static int print_space(lua_State* L) { + REQUIRE_LOOP; + myloop->printIterationSpace(); + END_REQUIRE_LOOP; + return 0; + } + + static int exit(lua_State *L) { + strict_arg_num(L, 0); + repl_stop = true; + return 0; + } + +static int known(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 1); + int num_dim = myloop->known.n_set(); + + // parse expression from string + std::vector<std::map<std::string, int> >* cond; + std::string cond_expr = luaL_optstring(L,1,0); + cond = parse_relation_vector(cond_expr.c_str()); + + Relation rel(num_dim); + F_And *f_root = rel.add_and(); + for (int j = 0; j < cond->size(); j++) { + GEQ_Handle h = f_root->add_GEQ(); + for (std::map<std::string, int>::iterator it = (*cond)[j].begin(); it != (*cond)[j].end(); it++) { + try { + int dim = from_string<int>(it->first); + if (dim == 0) + h.update_const(it->second); + else + throw std::invalid_argument("only symbolic variables are allowed in known condition"); + } + catch (std::ios::failure e) { + Free_Var_Decl *g = NULL; + for (unsigned i = 0; i < myloop->freevar.size(); i++) { + std::string name = myloop->freevar[i]->base_name(); + if (name == it->first) { + g = myloop->freevar[i]; + break; + } + } + if (g == NULL) + throw std::invalid_argument("symbolic variable " + it->first + " not found"); + else + h.update_coef(rel.get_local(g), it->second); + } + } + } + myloop->addKnown(rel); + END_REQUIRE_LOOP; + return 0; +} + + static int remove_dep(lua_State* L) { + REQUIRE_LOOP; + strict_arg_num(L, 0); + int from = luaL_optint(L, 1, 0); + int to = luaL_optint(L, 2, 0); + myloop->removeDependence(from, to); + END_REQUIRE_LOOP; + return 0; + } + + static int original(lua_State* L) { + REQUIRE_LOOP; + strict_arg_num(L, 0); + myloop->original(); + END_REQUIRE_LOOP; + return 0; + } + + static int permute(lua_State *L) { + REQUIRE_LOOP; + int nargs = lua_gettop(L); + if((nargs < 1) || (nargs > 3)) + throw std::runtime_error("incorrect number of arguments in permute"); + if(nargs == 1) { + // premute ( vector ) + std::vector<int> pi; + if(!tointvector(L, 1, pi)) + throw std::runtime_error("first arg in permute(pi) must be an int vector"); + myloop->permute(pi); + } + else if (nargs == 2) { + // permute ( set, vector ) + std::set<int> active; + std::vector<int> pi; + if(!tointset(L, 1, active)) + throw std::runtime_error("the first argument in permute(active, pi) must be an int set"); + if(!tointvector(L, 2, pi)) + throw std::runtime_error("the second argument in permute(active, pi) must be an int vector"); + myloop->permute(active, pi); + } + else if (nargs == 3) { + int stmt_num = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + std::vector<int> pi; + if(!tointvector(L, 3, pi)) + throw std::runtime_error("the third argument in permute(stmt_num, level, pi) must be an int vector"); + myloop->permute(stmt_num, level, pi); + } + END_REQUIRE_LOOP; + return 0; + } + + static int pragma(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 3); + int stmt_num = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + std::string pragmaText = luaL_optstring(L, 3, ""); + myloop->pragma(stmt_num, level, pragmaText); + END_REQUIRE_LOOP; + return 0; + } + + static int prefetch(lua_State *L) { + REQUIRE_LOOP; + strict_arg_num(L, 3); + int stmt_num = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + std::string prefetchText = luaL_optstring(L, 3, ""); + int hint = luaL_optint(L, 4, 0); + myloop->prefetch(stmt_num, level, prefetchText, hint); + END_REQUIRE_LOOP; + return 0; + } + + static int tile(lua_State* L) { + REQUIRE_LOOP; + int nargs = lua_gettop(L); + if((nargs < 3) || (nargs > 7)) + throw std::runtime_error("incorrect number of arguments for tile"); + int stmt_num = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + int tile_size = luaL_optint(L, 3, 0); + if(nargs == 3) { + myloop->tile(stmt_num, level, tile_size); + } + else if(nargs >= 4) { + int outer_level = luaL_optint(L, 4, 0); + if(nargs >= 5) { + TilingMethodType method = StridedTile; + int imethod = luaL_optint(L, 5, 2); + // check method input against expected values + if (imethod == 0) + method = StridedTile; + else if (imethod == 1) + method = CountedTile; + else + throw std::runtime_error("5th argument must be either strided or counted"); + if(nargs >= 6) { + int alignment_offset = luaL_optint(L, 6, 0); + if(nargs == 7) { + int alignment_multiple = luaL_optint(L, 7, 1); + myloop->tile(stmt_num, level, tile_size, outer_level, method, alignment_offset, alignment_multiple); + } + if(nargs == 6) + myloop->tile(stmt_num, level, tile_size, outer_level, method, alignment_offset); + } + if(nargs == 5) + myloop->tile(stmt_num, level, tile_size, outer_level, method); + } + if(nargs == 4) + myloop->tile(stmt_num, level, tile_size, outer_level); + } + END_REQUIRE_LOOP; + return 0; + } + + static int datacopy(lua_State* L) { + REQUIRE_LOOP; + int nargs = lua_gettop(L); + if((nargs < 3) || (nargs > 7)) + throw std::runtime_error("incorrect number of arguments for datacopy"); + // Overload 1: bool datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, int memory_type = 0); + // Overload 2: bool datacopy(int stmt_num, int level, const std::string &array_name, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, int memory_type = 0); + int stmt_num = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + std::string array_name = std::string(luaL_optstring(L,3,0)); + bool allow_extra_read = luaL_optboolean(L, 4, false); + int fastest_changing_dimension = luaL_optint(L, 5, -1); + int padding_stride = luaL_optint(L, 6, 1); + int padding_alignment = luaL_optint(L, 7, 4); + int memory_type = luaL_optint(L, 8, 0); + myloop->datacopy(stmt_num, level, array_name, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + END_REQUIRE_LOOP; + return 0; + } + + static int datacopy_privatized(lua_State* L) { + // bool datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, int memory_type = 0); + REQUIRE_LOOP; + int nargs = lua_gettop(L); + if((nargs < 4) || (nargs > 9)) + throw std::runtime_error("incorrect number of arguments for datacopy_privatized"); + int stmt_num = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + std::string array_name = std::string(luaL_optstring(L, 3, 0)); + std::vector<int> privatized_levels; + tointvector(L, 4, privatized_levels); + bool allow_extra_read = luaL_optboolean(L, 5, false); + int fastest_changing_dimension = luaL_optint(L, 6, -1); + int padding_stride = luaL_optint(L, 7, 1); + int padding_alignment = luaL_optint(L, 8, 1); + int memory_type = luaL_optint(L, 9, 0); + myloop->datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + END_REQUIRE_LOOP; + return 0; + } + + static int unroll(lua_State* L) { + REQUIRE_LOOP; + int nargs = lua_gettop(L); + if((nargs < 3) || (nargs > 4)) + throw std::runtime_error("incorrect number of arguments for unroll"); + //std::set<int> unroll(int stmt_num, int level, int unroll_amount, std::vector< std::vector<std::string> >idxNames= std::vector< std::vector<std::string> >(), int cleanup_split_level = 0); + int stmt_num = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + int unroll_amount = luaL_optint(L, 3, 0); + std::vector< std::vector<std::string> > idxNames = std::vector< std::vector<std::string> >(); + int cleanup_split_level = luaL_optint(L, 4, 0); + myloop->unroll(stmt_num, level, unroll_amount, idxNames, cleanup_split_level); + END_REQUIRE_LOOP; + return 0; + } + + static int unroll_extra(lua_State* L) { + REQUIRE_LOOP; + int nargs = lua_gettop(L); + if((nargs < 3) || (nargs < 4)) + throw std::runtime_error("incorrect number of arguments for unroll_extra"); + int stmt_num = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + int unroll_amount = luaL_optint(L, 3, 0); + int cleanup_split_level = luaL_optint(L, 4, 0); + myloop->unroll_extra(stmt_num, level, unroll_amount, cleanup_split_level); + END_REQUIRE_LOOP; + return 0; + } + + static int split(lua_State* L) { + REQUIRE_LOOP; + strict_arg_num(L, 3); + int stmt_num = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + int num_dim = myloop->stmt[stmt_num].xform.n_out(); + + // parse expression from string + std::vector<std::map<std::string, int> >* cond; + std::string cond_expr = luaL_optstring(L,3,0); + cond = parse_relation_vector(cond_expr.c_str()); + + Relation rel((num_dim-1)/2); + F_And *f_root = rel.add_and(); + for (int j = 0; j < cond->size(); j++) { + GEQ_Handle h = f_root->add_GEQ(); + for (std::map<std::string, int>::iterator it = (*cond)[j].begin(); it != (*cond)[j].end(); it++) { + try { + int dim = from_string<int>(it->first); + if (dim == 0) + h.update_const(it->second); + else { + if (dim > (num_dim-1)/2) + throw std::invalid_argument("invalid loop level " + to_string(dim) + " in split condition"); + h.update_coef(rel.set_var(dim), it->second); + } + } + catch (std::ios::failure e) { + Free_Var_Decl *g = NULL; + for (unsigned i = 0; i < myloop->freevar.size(); i++) { + std::string name = myloop->freevar[i]->base_name(); + if (name == it->first) { + g = myloop->freevar[i]; + break; + } + } + if (g == NULL) + throw std::invalid_argument("unrecognized variable " + to_string(it->first.c_str())); + h.update_coef(rel.get_local(g), it->second); + } + } + } + myloop->split(stmt_num,level,rel); + END_REQUIRE_LOOP; + return 0; + } + +static int nonsingular(lua_State* L) { + REQUIRE_LOOP; + std::vector< std::vector<int> > mat; + tointmatrix(L, 1, mat); + myloop->nonsingular(mat); + END_REQUIRE_LOOP; + return 0; +} + +static int skew(lua_State* L) { + REQUIRE_LOOP; + std::set<int> stmt_nums; + std::vector<int> skew_amounts; + int level = luaL_optint(L, 2, 0); + tointset(L, 1, stmt_nums); + tointvector(L, 3, skew_amounts); + myloop->skew(stmt_nums, level, skew_amounts); + END_REQUIRE_LOOP; + return 0; +} + +static int scale(lua_State* L) { + REQUIRE_LOOP; + strict_arg_num(L, 3); + std::set<int> stmt_nums; + int level = luaL_optint(L, 2, 0); + int scale_amount = luaL_optint(L, 3, 0); + tointset(L, 1, stmt_nums); + myloop->scale(stmt_nums, level, scale_amount); + END_REQUIRE_LOOP; + return 0; +} + +static int reverse(lua_State* L) { + REQUIRE_LOOP; + strict_arg_num(L, 2); + std::set<int> stmt_nums; + int level = luaL_optint(L, 2, 0); + tointset(L, 1, stmt_nums); + myloop->reverse(stmt_nums, level); + END_REQUIRE_LOOP; + return 0; +} + +static int shift(lua_State* L) { + REQUIRE_LOOP; + strict_arg_num(L, 3); + std::set<int> stmt_nums; + int level = luaL_optint(L, 2, 0); + int shift_amount = luaL_optint(L, 3, 0); + tointset(L, 1, stmt_nums); + myloop->shift(stmt_nums, level, shift_amount); + END_REQUIRE_LOOP; + return 0; +} + +static int shift_to(lua_State* L) { + REQUIRE_LOOP; + strict_arg_num(L, 3); + int stmt_num = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + int absolute_pos = luaL_optint(L, 3, 0); + myloop->shift_to(stmt_num, level, absolute_pos); + END_REQUIRE_LOOP; + return 0; +} + +static int peel(lua_State* L) { + REQUIRE_LOOP; + strict_arg_num(L, 2, 3); + int stmt_num = luaL_optint(L, 1, 0); + int level = luaL_optint(L, 2, 0); + int amount = luaL_optint(L, 3, 1); + myloop->peel(stmt_num, level, amount); + END_REQUIRE_LOOP; + return 0; +} + +static int fuse(lua_State* L) { + REQUIRE_LOOP; + strict_arg_num(L, 2); + std::set<int> stmt_nums; + int level = luaL_optint(L, 2, 0); + tointset(L, 1, stmt_nums); + myloop->fuse(stmt_nums, level); + END_REQUIRE_LOOP; + return 0; +} + +static int distribute(lua_State* L) { + REQUIRE_LOOP; + strict_arg_num(L, 2); + std::set<int> stmts; + int level = luaL_optint(L, 1, 0); + tointset(L, 2, stmts); + myloop->distribute(stmts, level); + END_REQUIRE_LOOP; + return 0; +} + +static int num_statements(lua_State *L) { + REQUIRE_LOOP; + lua_pushinteger(L, myloop->stmt.size()); + END_REQUIRE_LOOP; + return 1; +} +#endif + +/** + * Register global methods available to chill scripts + */ + void register_globals(lua_State *L) { +//--- +//Preset globals +//--- + lua_pushstring(L, CHILL_BUILD_VERSION); + lua_setglobal(L, "VERSION"); + + lua_pushstring(L, "C"); + lua_setglobal(L, "dest"); + lua_pushstring(L, "C"); + lua_setglobal(L, "C"); + +//--- +//Enums for functions +//--- + +//TileMethod + lua_pushinteger(L, 0); + lua_setglobal(L, "strided"); + lua_pushinteger(L, 1); + lua_setglobal(L, "counted"); + +//MemoryMode + lua_pushinteger(L, 0); + lua_setglobal(L, "global"); + lua_pushinteger(L, 1); + lua_setglobal(L, "shared"); + lua_pushinteger(L, 2); + lua_setglobal(L, "texture"); + +//Bool flags + lua_pushboolean(L, 1); + lua_setglobal(L, "sync"); +//... + } + +#ifdef CUDACHILL + void register_functions(lua_State *L) { + lua_register(L, "init", init); + lua_register(L, "exit", exit); + lua_register(L, "print_code", print_code); + lua_register(L, "print_ri", print_ri); + lua_register(L, "print_idx", print_idx); + lua_register(L, "print_dep", print_dep); + lua_register(L, "print_space", print_space); + lua_register(L, "num_statement", num_statement); + } + + void register_v2(lua_State *L) { + lua_register(L, "cudaize", cudaize_v2); + lua_register(L, "tile", tile_v2); + lua_register(L, "permute", permute_v2); + lua_register(L, "datacopy_privatized", datacopy_privatized_v2); + lua_register(L, "datacopy", datacopy_v2); + lua_register(L, "unroll", unroll_v2); + + lua_register(L, "cur_indices", cur_indices); + lua_register(L, "block_indices", block_indices); + lua_register(L, "thread_indices", thread_indices); + lua_register(L, "block_dims", block_dims); + + lua_register(L, "thread_dims", thread_dims); + lua_register(L, "hard_loop_bounds", hard_loop_bounds); + lua_register(L, "num_statements", num_statement); + + lua_register(L, "does_exists", does_var_exists); + lua_register(L, "add_sync", add_sync); + lua_register(L, "rename_index", rename_index); + + lua_register(L, "copy_to_texture", copy_to_texture); + lua_register(L, "copy_to_constant", copy_to_constant); + } + +#else // CHiLL + void register_functions(lua_State* L) { + lua_register(L, "source", source); + lua_register(L, "procedure", procedure); + lua_register(L, "loop", loop); + lua_register(L, "print_code", print_code); + lua_register(L, "print_dep", print_dep); + lua_register(L, "print_space", print_space); + lua_register(L, "exit", exit); + lua_register(L, "known", known); + lua_register(L, "remove_dep", remove_dep); + lua_register(L, "original", original); + lua_register(L, "permute", permute); + lua_register(L, "pragma", pragma); + lua_register(L, "prefetch", prefetch); + lua_register(L, "tile", tile); + lua_register(L, "datacopy", datacopy); + lua_register(L, "datacopy_privatised", datacopy_privatized); + lua_register(L, "unroll", unroll); + lua_register(L, "unroll_extra", unroll_extra); + lua_register(L, "split", split); + lua_register(L, "nonsingular", nonsingular); + lua_register(L, "skew", skew); + lua_register(L, "scale", scale); + lua_register(L, "reverse", reverse); + lua_register(L, "shift", shift); + lua_register(L, "shift_to", shift_to); + lua_register(L, "peel", peel); + lua_register(L, "fuse", fuse); + lua_register(L, "distribute", distribute); + lua_register(L, "num_statements", num_statements); + } +#endif diff --git a/chill_env.hh b/chill_env.hh new file mode 100644 index 0000000..28e8fcf --- /dev/null +++ b/chill_env.hh @@ -0,0 +1,15 @@ +#ifndef CHILL_ENV_H +#define CHILL_ENV_H + +typedef struct lua_State lua_State; + +void register_globals(lua_State *L); +void register_functions(lua_State *L); +#ifdef CUDACHILL +int get_loop_num(lua_State *L); +#else +void finalize_loop(int loop_num_start, int loop_num_end); +int get_loop_num_start(lua_State *L); +int get_loop_num_end(lua_State *L); +#endif +#endif diff --git a/chill_error.hh b/chill_error.hh new file mode 100644 index 0000000..dc7432f --- /dev/null +++ b/chill_error.hh @@ -0,0 +1,19 @@ +#ifndef CHILL_ERROR_HH +#define CHILL_ERROR_HH + +// for loop transformation problem +struct loop_error: public std::runtime_error { + loop_error(const std::string &msg): std::runtime_error(msg){} +}; + +// for generic compiler intermediate code handling problem +struct ir_error: public std::runtime_error { + ir_error(const std::string &msg): std::runtime_error(msg){} +}; + +// specific for expression to preburger math translation problem +struct ir_exp_error: public ir_error { + ir_exp_error(const std::string &msg): ir_error(msg){} +}; + +#endif diff --git a/chill_run.cc b/chill_run.cc new file mode 100644 index 0000000..ba4de9d --- /dev/null +++ b/chill_run.cc @@ -0,0 +1,394 @@ +#include "chilldebug.h" + +// this is a little messy. the Makefile should be able to define one or the other +#ifndef PYTHON +#ifndef LUA +#define LUA +#endif +#endif + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "chill_env.hh" + +#include "loop.hh" +#include <omega.h> +#include "ir_code.hh" + +#ifdef CUDACHILL + +#ifdef BUILD_ROSE +#include "loop_cuda_rose.hh" +#include "ir_cudarose.hh" +#elif BUILD_SUIF +#include "loop_cuda.hh" +#include "ir_cudasuif.hh" +#endif + +#else + +#ifdef BUILD_ROSE +#include "ir_rose.hh" +#elif BUILD_SUIF +#include "ir_suif.hh" +#endif + +#endif + +#ifdef LUA +#define lua_c //Get the configuration defines for doing an interactive shell +#include <lua.hpp> //All lua includes wrapped in extern "C" +#include "chill_env.hh" // Lua wrapper functions for CHiLL +#elif PYTHON +#include "chillmodule.hh" // Python wrapper functions for CHiLL +#endif + +//--- +// CHiLL globals +//--- +Loop *myloop = NULL; +IR_Code *ir_code = NULL; +bool repl_stop = false; +bool is_interactive = false; + +std::vector<IR_Control *> ir_controls; +std::vector<int> loops; + +// this whole section belongs somewhere else +#ifdef LUA +//--- +// Interactive mode functions, directly copied out of lua.c +//--- +// The Lua interpreter state +static lua_State *globalL = NULL; +static const char *progname = "CHiLL"; + +static void lstop (lua_State *L, lua_Debug *ar) { + (void)ar; /* unused arg. */ + lua_sethook(L, NULL, 0, 0); + luaL_error(L, "interrupted!"); +} + + +static void laction (int i) { + signal(i, SIG_DFL); /* if another SIGINT happens before lstop, + terminate process (default action) */ + lua_sethook(globalL, lstop, LUA_MASKCALL | LUA_MASKRET | LUA_MASKCOUNT, 1); +} + + +static void l_message (const char *pname, const char *msg) { + if (pname) fprintf(stderr, "%s: ", pname); + fprintf(stderr, "%s\n", msg); + fflush(stderr); // ? does this do anything ? +} + + +static int report (lua_State *L, int status) { + if (status && !lua_isnil(L, -1)) { + const char *msg = lua_tostring(L, -1); + if (msg == NULL) msg = "(error object is not a string)"; + l_message(progname, msg); + lua_pop(L, 1); + } + return status; +} + + +static int traceback (lua_State *L) { + if (!lua_isstring(L, 1)) /* 'message' not a string? */ + return 1; /* keep it intact */ + lua_getfield(L, LUA_GLOBALSINDEX, "debug"); + if (!lua_istable(L, -1)) { + lua_pop(L, 1); + return 1; + } + lua_getfield(L, -1, "traceback"); + if (!lua_isfunction(L, -1)) { + lua_pop(L, 2); + return 1; + } + lua_pushvalue(L, 1); /* pass error message */ + lua_pushinteger(L, 2); /* skip this function and traceback */ + lua_call(L, 2, 1); /* call debug.traceback */ + return 1; +} + + +static int docall (lua_State *L, int narg, int clear) { + DEBUG_PRINT("\ndocall()\n"); + int status; + int base = lua_gettop(L) - narg; /* function index */ + lua_pushcfunction(L, traceback); /* push traceback function */ + lua_insert(L, base); /* put it under chunk and args */ + signal(SIGINT, laction); + + DEBUG_PRINT("status = lua_pcall(L, narg, (clear ? 0 : LUA_MULTRET), base);\n"); + + status = lua_pcall(L, narg, (clear ? 0 : LUA_MULTRET), base); + signal(SIGINT, SIG_DFL); + lua_remove(L, base); /* remove traceback function */ + /* force a complete garbage collection in case of errors */ + if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); + return status; +} + +static int dofile (lua_State *L, const char *name) { + int status = luaL_loadfile(L, name) || docall(L, 0, 1); + return report(L, status); +} + +static const char *get_prompt (lua_State *L, int firstline) { + const char *p; + lua_getfield(L, LUA_GLOBALSINDEX, firstline ? "_PROMPT" : "_PROMPT2"); + p = lua_tostring(L, -1); + if (p == NULL) p = (firstline ? LUA_PROMPT : LUA_PROMPT2); + lua_pop(L, 1); /* remove global */ + return p; +} + + +static int incomplete (lua_State *L, int status) { + if (status == LUA_ERRSYNTAX) { + size_t lmsg; + const char *msg = lua_tolstring(L, -1, &lmsg); + const char *tp = msg + lmsg - (sizeof(LUA_QL("<eof>")) - 1); + if (strstr(msg, LUA_QL("<eof>")) == tp) { + lua_pop(L, 1); + return 1; + } + } + return 0; /* else... */ +} + + +static int pushline (lua_State *L, int firstline) { + char buffer[LUA_MAXINPUT]; + char *b = buffer; + size_t l; + const char *prmt = get_prompt(L, firstline); + if (lua_readline(L, b, prmt) == 0) + return 0; /* no input */ + l = strlen(b); + if (l > 0 && b[l-1] == '\n') /* line ends with newline? */ + b[l-1] = '\0'; /* remove it */ + if (firstline && b[0] == '=') /* first line starts with `=' ? */ + lua_pushfstring(L, "return %s", b+1); /* change it to `return' */ + else + lua_pushstring(L, b); + lua_freeline(L, b); + return 1; +} + + +static int loadline (lua_State *L) { + int status; + lua_settop(L, 0); + if (!pushline(L, 1)) + return -1; /* no input */ + for (;;) { /* repeat until gets a complete line */ + status = luaL_loadbuffer(L, lua_tostring(L, 1), lua_strlen(L, 1), "=stdin"); + if (!incomplete(L, status)) break; /* cannot try to add lines? */ + if (!pushline(L, 0)) /* no more input? */ + return -1; + lua_pushliteral(L, "\n"); /* add a new line... */ + lua_insert(L, -2); /* ...between the two lines */ + lua_concat(L, 3); /* join them */ + } + lua_saveline(L, 1); + lua_remove(L, 1); /* remove line */ + return status; +} + + +static void dotty (lua_State *L) { + int status; + const char *oldprogname = progname; + progname = NULL; + while ((status = loadline(L)) != -1) { + if (status == 0) status = docall(L, 0, 0); + report(L, status); + if(repl_stop) + break; + if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */ + lua_getglobal(L, "print"); + lua_insert(L, 1); + if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) + l_message(progname, lua_pushfstring(L, + "error calling " LUA_QL("print") " (%s)", + lua_tostring(L, -1))); + } + } + lua_settop(L, 0); /* clear stack */ + fputs("\n", stdout); + fflush(stdout); + progname = oldprogname; +} +#endif + +//--- +//--- + +//--- +// CHiLL program main +// Initialize state and run script or interactive mode +//--- +int main( int argc, char* argv[] ) +{ + DEBUG_PRINT("%s main()\n", argv[0]); + if (argc > 2) { + fprintf(stderr, "Usage: %s [script_file]\n", argv[0]); + exit(-1); + } + + int fail = 0; + +#ifdef PYTHON + // Create PYTHON interpreter + /* Pass argv[0] to the Python interpreter */ + Py_SetProgramName(argv[0]); + + /* Initialize the Python interpreter. Required. */ + Py_Initialize(); + + /* Add a static module */ + initchill(); + + if (argc == 2) { +/* #ifdef CUDACHILL --- This code is for translating lua to python before interprating. --- + //DEBUG_PRINT("\ncalling python\n"); + // file interpretlua.py has routines to read the lua transformation file + PyRun_SimpleString("from interpretlua import *"); + //DEBUG_PRINT("DONE calling python import of functions\n\n"); + char pythoncommand[800]; + sprintf(pythoncommand, "\n\ndopytransform(\"%s\")\0", argv[1]); + //DEBUG_PRINT("in C, running python command '%s'\n", pythoncommand); + + PyRun_SimpleString( pythoncommand ); + #else*/ + FILE* f = fopen(argv[1], "r"); + if(!f){ + printf("can't open script file \"%s\"\n", argv[1]); + exit(-1); + } + PyRun_SimpleFile(f, argv[1]); + fclose(f); + } + if (argc == 1) { + //--- + // Run a CHiLL interpreter + //--- + printf("CUDA-CHiLL v0.2.0 (built on %s)\n", CHILL_BUILD_DATE); + printf("Copyright (C) 2008 University of Southern California\n"); + printf("Copyright (C) 2009-2012 University of Utah\n"); + //is_interactive = true; // let the lua interpreter know. + fflush(stdout); + // TODO: read lines of python code. + //Not sure if we should set fail from interactive mode + printf("CUDA-CHiLL ending...\n"); + fflush(stdout); + } + + //printf("DONE with PyRun_SimpleString()\n"); +// #endif --- endif for CUDACHILL --- +#endif + //END python setup +#ifdef LUA + + //Create interpreter + lua_State* L = lua_open(); + globalL = L; + + //Initialize the std libs + luaL_openlibs(L); + + //Initialize globals + register_globals(L); + + //Register CHiLL functions + register_functions(L); + + if (argc == 2) { + //--- + // Run a CHiLL script from a file + //--- + + //Check that the file can be opened + FILE* f = fopen(argv[1],"r"); + if(!f){ + printf("can't open script file \"%s\"\n", argv[1]); + exit(-1); + } + fclose(f); + + DEBUG_PRINT("\n*********************evaluating file '%s'\n", argv[1]); + + //Evaluate the file + fail = dofile(L, argv[1]); + if(!fail){ + fprintf(stderr, "script success!\n"); + } + } + if (argc == 1 && isatty((int)fileno(stdin))) { + //--- + // Run a CHiLL interpreter + //--- + printf("CUDA-CHiLL v0.2.0 (built on %s)\n", CHILL_BUILD_DATE); + printf("Copyright (C) 2008 University of Southern California\n"); + printf("Copyright (C) 2009-2012 University of Utah\n"); + is_interactive = true; // let the lua interpreter know. + fflush(stdout); + dotty(L); + //Not sure if we should set fail from interactive mode + printf("CUDA-CHiLL ending...\n"); + fflush(stdout); + } +#endif + + + if (!fail && ir_code != NULL && myloop != NULL && myloop->stmt.size() != 0 && !myloop->stmt[0].xform.is_null()) { +#ifdef CUDACHILL + int lnum; + #ifdef PYTHON + lnum = 0; + #else + lnum = get_loop_num( L ); + #endif + #ifdef BUILD_ROSE + ((IR_cudaroseCode *)(ir_code))->commit_loop(myloop, lnum); + #elif BUILD_SUIF + ((IR_cudasuifCode *)(ir_code))->commit_loop(myloop, lnum); + #endif +#else + int lnum_start; + int lnum_end; + #ifdef PYTHON + lnum_start = get_loop_num_start(); + lnum_end = get_loop_num_end(); + DEBUG_PRINT("calling ROSE code gen? loop num %d\n", lnum); + #else + lnum_start = get_loop_num_start(L); + lnum_end = get_loop_num_end(L); + DEBUG_PRINT("calling ROSE code gen? loop num %d - %d\n", lnum_start, lnum_end); + #endif +#endif + #ifdef BUILD_ROSE + //finalize_loop(lnum_start, lnum_end); + //((IR_roseCode*)(ir_cide))->commit_loop(myloop, lnum); + ((IR_roseCode*)(ir_code))->finalizeRose(); + //#elif BUILD_SUIF + //((IR_suifCode*)(ir_code))->commit_loop(myloop, lnum); + #endif + delete ir_code; + } +#ifdef PYTHON + Py_Finalize(); +#endif +#ifdef LUA + lua_close(L); +#endif + return 0; +} diff --git a/chill_run_util.cc b/chill_run_util.cc new file mode 100644 index 0000000..566bc61 --- /dev/null +++ b/chill_run_util.cc @@ -0,0 +1,129 @@ +#include <stdio.h> +#include <string.h> +#include "chill_run_util.hh" + +static std::string to_string(int ival) { + char buffer[4]; + sprintf(buffer, "%d", ival); + return std::string(buffer); +} + +simap_vec_t* make_prog(simap_vec_t* cond) { + return cond; +} + +simap_vec_t* make_cond_gt(simap_t* lhs, simap_t* rhs) { + simap_vec_t* nvec = new simap_vec_t(); + for(simap_t::iterator it = rhs->begin(); it != rhs->end(); it++) + (*lhs)[it->first] -= it->second; + (*lhs)[to_string(0)] -= 1; + nvec->push_back(*lhs); + delete rhs; + delete lhs; + return nvec; +} + +simap_vec_t* make_cond_lt(simap_t* lhs, simap_t* rhs) { + return make_cond_gt(rhs, lhs); +} + +simap_vec_t* make_cond_ge(simap_t* lhs, simap_t* rhs) { + simap_vec_t* nvec = new simap_vec_t(); + for(simap_t::iterator it = rhs->begin(); it != rhs->end(); it++) + (*lhs)[it->first] -= it->second; + nvec->push_back(*lhs); + delete rhs; + delete lhs; + return nvec; +} + +simap_vec_t* make_cond_le(simap_t* lhs, simap_t* rhs) { + return make_cond_ge(rhs, lhs); +} + +simap_vec_t* make_cond_eq(simap_t* lhs, simap_t* rhs) { + simap_vec_t* nvec = new simap_vec_t(); + for(simap_t::iterator it = lhs->begin(); it != lhs->end(); it++) + (*rhs)[it->first] -= it->second; + nvec->push_back(*rhs); + for(simap_t::iterator it = rhs->begin(); it != rhs->end(); it++) + it->second = -it->second; + nvec->push_back(*rhs); + delete rhs; + delete lhs; + return nvec; +} + +simap_t* make_cond_item_add(simap_t* lhs, simap_t* rhs) { + for(simap_t::iterator it = lhs->begin(); it != lhs->end(); it++) + (*rhs)[it->first] += it->second; + delete lhs; + return rhs; +} + +simap_t* make_cond_item_sub(simap_t* lhs, simap_t* rhs) { + for(simap_t::iterator it = lhs->begin(); it != lhs->end(); it++) + (*rhs)[it->first] -= it->second; + delete lhs; + return rhs; +} + +simap_t* make_cond_item_mul(simap_t* lhs, simap_t* rhs) { + (*lhs)[to_string(0)] += 0; + (*rhs)[to_string(0)] += 0; + if(rhs->size() == 1) { + int t = (*rhs)[to_string(0)]; + for(simap_t::iterator it = lhs->begin(); it != lhs->end(); it++) + it->second *= t; + delete rhs; + return lhs; + } + else if(rhs->size() == 1) { + int t = (*lhs)[to_string(0)]; + for(simap_t::iterator it = rhs->begin(); it != rhs->end(); it++) + it->second *= t; + delete lhs; + return rhs; + } + else { + fprintf(stderr, "require Presburger formula"); + delete lhs; + delete rhs; + // exit(2); <-- this may be a boost feature + } +} + +simap_t* make_cond_item_neg(simap_t* expr) { + for (simap_t::iterator it = expr->begin(); it != expr->end(); it++) { + it->second = -(it->second); + } + return expr; +} + +simap_t* make_cond_item_number(int n) { + simap_t* nmap = new simap_t(); + (*nmap)[to_string(0)] = n; + return nmap; +} + +simap_t* make_cond_item_variable(const char* var) { + simap_t* nmap = new simap_t(); + (*nmap)[std::string(var)] = 1; + return nmap; +} + +simap_t* make_cond_item_level(int n) { + simap_t* nmap = new simap_t(); + (*nmap)[to_string(n)] = 1; + return nmap; +} + +/*simap_t* make_cond_item_variable(const char* varname) { + simap_t* nmap = new simap_t(); +#ifdef PYTHON + PyObject* globals = PyEval_GetGlobals(); + PyObject* itemval = PyDict_GetItemString(globals, varname); + +#elif LUA +#endif +}*/ diff --git a/chill_run_util.hh b/chill_run_util.hh new file mode 100644 index 0000000..0b716be --- /dev/null +++ b/chill_run_util.hh @@ -0,0 +1,26 @@ +#ifndef CHILL_RUN_UTIL_HH +#define CHILL_RUN_UTIL_HH + +#include <vector> +#include <map> +#include <string> + +typedef std::map<std::string, int> simap_t; +typedef std::vector<std::map<std::string, int> > simap_vec_t; + +simap_vec_t* make_prog(simap_vec_t* cond); +simap_vec_t* make_cond_gt(simap_t* lhs, simap_t* rhs); +simap_vec_t* make_cond_lt(simap_t* lhs, simap_t* rhs); +simap_vec_t* make_cond_ge(simap_t* lhs, simap_t* rhs); +simap_vec_t* make_cond_le(simap_t* lhs, simap_t* rhs); +simap_vec_t* make_cond_eq(simap_t* lhs, simap_t* rhs); +simap_t* make_cond_item_add(simap_t* lhs, simap_t* rhs); +simap_t* make_cond_item_sub(simap_t* lhs, simap_t* rhs); +simap_t* make_cond_item_mul(simap_t* lhs, simap_t* rhs); +simap_t* make_cond_item_neg(simap_t* expr); +simap_t* make_cond_item_number(int n); +simap_t* make_cond_item_variable(const char* var); +simap_t* make_cond_item_level(int n); +simap_vec_t* parse_relation_vector(const char* expr); + +#endif diff --git a/chilldebug.h b/chilldebug.h new file mode 100644 index 0000000..4abbb82 --- /dev/null +++ b/chilldebug.h @@ -0,0 +1,11 @@ + +// a central place to turn on debugging messages + +// enable the next line to get lots of output +//#define DEBUGCHILL + +#ifdef DEBUGCHILL +#define DEBUG_PRINT(args...) fprintf(stderr, args ) +#else +#define DEBUG_PRINT(args...) /* Don't do anything */ +#endif diff --git a/chillmodule.cc b/chillmodule.cc new file mode 100644 index 0000000..fa55199 --- /dev/null +++ b/chillmodule.cc @@ -0,0 +1,1834 @@ + +// chill interface to python + +#include "chilldebug.h" + +#ifdef CUDACHILL + +#include "rose.h" // ?? +#include "loop_cuda_rose.hh" +#include "ir_rose.hh" +#include "ir_cudarose.hh" + +#include <vector> + +#else + +#include "chill_run_util.hh" + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <omega.h> +#include "loop.hh" +#include "ir_code.hh" +#ifdef BUILD_ROSE +#include "ir_rose.hh" +#elif BUILD_SUIF +#include "ir_suif.hh" +#endif + +#endif + +#include "chillmodule.hh" + +// TODO +#undef _POSIX_C_SOURCE +#undef _XOPEN_SOURCE +#include <Python.h> + +using namespace omega; + +// -- Cuda CHiLL global variables -- +#ifdef CUDACHILL + +extern LoopCuda *myloop; +extern IR_Code *ir_code; +extern std::vector<IR_Control *> ir_controls; +extern std::vector<int> loops; + +#else + +extern Loop *myloop; +extern IR_Code *ir_code; +extern bool is_interactive; +extern bool repl_stop; + +std::string procedure_name; +std::string source_filename; + +int loop_start_num; +int loop_end_num; + +extern std::vector<IR_Control *> ir_controls; +extern std::vector<int> loops; + +#endif + +// ----------------------- // +// CHiLL support functions // +// ----------------------- // +#ifndef CUDACHILL +// not sure yet if this actually needs to be exposed to the python interface +// these four functions are here to maintain similarity to the Lua interface +int get_loop_num_start() { + return loop_start_num; +} + +int get_loop_num_end() { + return loop_end_num; +} + +static void set_loop_num_start(int start_num) { + loop_start_num = start_num; +} + +static void set_loop_num_end(int end_num) { + loop_end_num = end_num; +} + +// TODO: finalize_loop(int,int) and init_loop(int,int) are identical to thier Lua counterparts. +// consider integrating them + +void finalize_loop(int loop_num_start, int loop_num_end) { + if (loop_num_start == loop_num_end) { + ir_code->ReplaceCode(ir_controls[loops[loop_num_start]], myloop->getCode()); + ir_controls[loops[loop_num_start]] = NULL; + } + else { + std::vector<IR_Control *> parm; + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) + parm.push_back(ir_controls[i]); + IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); + ir_code->ReplaceCode(block, myloop->getCode()); + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { + delete ir_controls[i]; + ir_controls[i] = NULL; + } + } + delete myloop; +} +void finalize_loop() { + int loop_num_start = get_loop_num_start(); + int loop_num_end = get_loop_num_end(); + finalize_loop(loop_num_start, loop_num_end); +} +static void init_loop(int loop_num_start, int loop_num_end) { + if (source_filename.empty()) { + fprintf(stderr, "source file not set when initializing the loop"); + if (!is_interactive) + exit(2); + } + else { + if (ir_code == NULL) { + #ifdef BUILD_ROSE + if (procedure_name.empty()) + procedure_name = "main"; + #elif BUILD_SUIF + if (procedure_number == -1) + procedure_number = 0; + #endif + + #ifdef BUILD_ROSE + ir_code = new IR_roseCode(source_filename.c_str(), procedure_name.c_str()); + #elif BUILD_SUIF + ir_code = new IR_suifCode(source_filename.c_str(), procedure_name.c_str()); + #endif + + IR_Block *block = ir_code->GetCode(); + ir_controls = ir_code->FindOneLevelControlStructure(block); + for (int i = 0; i < ir_controls.size(); i++) { + if (ir_controls[i]->type() == IR_CONTROL_LOOP) + loops.push_back(i); + } + delete block; + } + if (myloop != NULL && myloop->isInitialized()) { + finalize_loop(); + } + } + set_loop_num_start(loop_num_start); + set_loop_num_end(loop_num_end); + if (loop_num_end < loop_num_start) { + fprintf(stderr, "the last loop must be after the start loop"); + if (!is_interactive) + exit(2); + } + if (loop_num_end >= loops.size()) { + fprintf(stderr, "loop %d does not exist", loop_num_end); + if (!is_interactive) + exit(2); + } + std::vector<IR_Control *> parm; + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { + if (ir_controls[i] == NULL) { + fprintf(stderr, "loop has already been processed"); + if (!is_interactive) + exit(2); + } + parm.push_back(ir_controls[i]); + } + IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); + myloop = new Loop(block); + delete block; + //if (is_interactive) printf("%s ", PROMPT_STRING); +} +#endif + +// ----------------------- // +// Python support funcions // +// ----------------------- // + +// -- CHiLL support -- // +static void strict_arg_num(PyObject* args, int arg_num, const char* fname = NULL) { + int arg_given = PyTuple_Size(args); + char msg[128]; + if(arg_num != arg_given) { + if(fname) + sprintf(msg, "%s: expected %i arguments, was given %i.", fname, arg_num, arg_given); + else + sprintf(msg, "Expected %i argumets, was given %i.", arg_num, arg_given); + throw std::runtime_error(msg); + } +} + +static int strict_arg_range(PyObject* args, int arg_min, int arg_max, const char* fname = NULL) { + int arg_given = PyTuple_Size(args); + char msg[128]; + if(arg_given < arg_min || arg_given > arg_max) { + if(fname) + sprintf(msg, "%s: expected %i to %i arguments, was given %i.", fname, arg_min, arg_max, arg_given); + else + sprintf(msg, "Expected %i to %i, argumets, was given %i.", arg_min, arg_max, arg_given); + throw std::runtime_error(msg); + } + return arg_given; +} + +static int intArg(PyObject* args, int index, int dval = 0) { + if(PyTuple_Size(args) <= index) + return dval; + int ival; + PyObject *item = PyTuple_GetItem(args, index); + Py_INCREF(item); + if (PyInt_Check(item)) ival = PyInt_AsLong(item); + else { + fprintf(stderr, "argument at index %i is not an int\n", index); + exit(-1); + } + return ival; +} + +static std::string strArg(PyObject* args, int index, const char* dval = NULL) { + if(PyTuple_Size(args) <= index) + return dval; + std::string strval; + PyObject *item = PyTuple_GetItem(args, index); + Py_INCREF(item); + if (PyString_Check(item)) strval = strdup(PyString_AsString(item)); + else { + fprintf(stderr, "argument at index %i is not an string\n", index); + exit(-1); + } + return strval; +} + +static bool boolArg(PyObject* args, int index, bool dval = false) { + if(PyTuple_Size(args) <= index) + return dval; + bool bval; + PyObject* item = PyTuple_GetItem(args, index); + Py_INCREF(item); + return (bool)PyObject_IsTrue(item); +} + +static bool tostringintmapvector(PyObject* args, int index, std::vector<std::map<std::string,int> >& vec) { + if(PyTuple_Size(args) <= index) + return false; + PyObject* seq = PyTuple_GetItem(args, index); + //TODO: Typecheck + int seq_len = PyList_Size(seq); + for(int i = 0; i < seq_len; i++) { + std::map<std::string,int> map; + PyObject* dict = PyList_GetItem(seq, i); + PyObject* keys = PyDict_Keys(dict); + //TODO: Typecheck + int dict_len = PyList_Size(keys); + for(int j = 0; j < dict_len; j++) { + PyObject* key = PyList_GetItem(keys, j); + PyObject* value = PyDict_GetItem(dict, key); + std::string str_key = strdup(PyString_AsString(key)); + int int_value = PyInt_AsLong(value); + map[str_key] = int_value; + } + vec.push_back(map); + } + return true; +} + +static bool tointvector(PyObject* seq, std::vector<int>& vec) { + //TODO: Typecheck + int seq_len = PyList_Size(seq); + for(int i = 0; i < seq_len; i++) { + PyObject* item = PyList_GetItem(seq, i); + vec.push_back(PyInt_AsLong(item)); + } + return true; +} + +static bool tointvector(PyObject* args, int index, std::vector<int>& vec) { + if(PyTuple_Size(args) <= index) + return false; + PyObject* seq = PyTuple_GetItem(args, index); + return tointvector(seq, vec); +} + +static bool tointset(PyObject* args, int index, std::set<int>& set) { + if(PyTuple_Size(args) <= index) + return false; + PyObject* seq = PyTuple_GetItem(args, index); + //TODO: Typecheck + int seq_len = PyList_Size(seq); + for(int i = 0; i < seq_len; i++) { + PyObject* item = PyList_GetItem(seq, i); + set.insert(PyInt_AsLong(item)); + } + return true; +} +static bool tointmatrix(PyObject* args, int index, std::vector<std::vector<int> >& mat) { + if(PyTuple_Size(args) <= index) + return false; + PyObject* seq_one = PyTuple_GetItem(args, index); + int seq_one_len = PyList_Size(seq_one); + for(int i = 0; i < seq_one_len; i++) { + std::vector<int> vec; + PyObject* seq_two = PyList_GetItem(seq_one, i); + int seq_two_len = PyList_Size(seq_two); + for(int j = 0; j < seq_two_len; j++) { + PyObject* item = PyList_GetItem(seq_two, j); + vec.push_back(PyInt_AsLong(item)); + } + mat.push_back(vec); + } + return true; +} + +#ifdef CUDACHILL +// ------------------------------ // +// Cuda CHiLL interface functions // +// ------------------------------ // + +static PyObject * +chill_print_code(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC print_code() PY\n"); + + myloop->printCode(); + + Py_RETURN_NONE; // return Py_BuildValue( "" ); + +} + +static PyObject * +chill_print_ri(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_print_ri() called from python\n"); + myloop->printRuntimeInfo(); + DEBUG_PRINT("\n"); + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_print_idx(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_print_idx() called from python\n"); + myloop->printIndexes(); + DEBUG_PRINT("\n"); + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_print_dep(PyObject *self, PyObject *args) +{ + DEBUG_PRINT("\nC chill_print_dep()\n"); + std::cout << myloop->dep; + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_print_space(PyObject *self, PyObject *args) +{ + DEBUG_PRINT("\nC chill_print_space()\n"); + for (int i = 0; i < myloop->stmt.size(); i++) { + DEBUG_PRINT("s%d: ", i+1); + Relation r; + if (!myloop->stmt[i].xform.is_null()) + r = Composition(copy(myloop->stmt[i].xform), copy(myloop->stmt[i].IS)); + else + r = copy(myloop->stmt[i].IS); + r.simplify(2, 4); + r.print(); + } + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_num_statements(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_num_statements() called from python\n"); + int num = myloop->stmt.size(); + //DEBUG_PRINT("C num_statement() = %d\n", num); + return Py_BuildValue( "i", num ); // BEWARE "d" is DOUBLE, not int +} + +static PyObject * +chill_does_var_exist( PyObject *self, PyObject *args) +{ + DEBUG_PRINT("\nC chill_does_var_exist()\n"); + int yesno = 0; + // TODO if (myloop->symbolExists(symName)) yesno = 1; + DEBUG_PRINT("*** chill_does_var_exist *** UNIMPLEMENTED\n"); + return Py_BuildValue( "i", yesno); // there seems to be no boolean type +} + + +static PyObject * +chill_add_sync(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_add_sync() *UNTESTED*\n"); + int sstmt = -123; + // char index_name[180]; + static char Buffer[1024]; + static char *index_name = &Buffer[0]; + + if (!PyArg_ParseTuple(args, "is", &sstmt, &index_name)){ + fprintf(stderr, "chill_add_sync, can't parse statement number and name passed from python\n"); + exit(-1); + } + + DEBUG_PRINT("chill_add_sync, statement %d index_name '%s'\n", + sstmt, index_name); + std::string idxName( index_name); // ?? + myloop->addSync(sstmt, idxName); + + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + +static PyObject * +chill_rename_index(PyObject *self, PyObject *args) +{ + DEBUG_PRINT("\nC chill_rename_index() called from python\n"); + int sstmt; + //char oldname[80], newname[80]; + static char old[1024], newn[1024]; + + static char *oldname = &old[0], *newname=&newn[0]; + + if (!PyArg_ParseTuple(args, "iss", &sstmt, &oldname, &newname)){ + fprintf(stderr, "chill_rename_index, can't parse statement number and names passed from python\n"); + exit(-1); + } + + //DEBUG_PRINT("chill_rename_index, statement %d oldname '%s' newname '%s'\n", + //sstmt, oldname, newname); + + std::string idxName(oldname); + std::string newName(newname); + + //DEBUG_PRINT("calling myloop->renameIndex( %d, %s, %s )\n", + //sstmt, idxName.c_str(), newName.c_str()); + + myloop->renameIndex(sstmt, idxName, newName); + + //DEBUG_PRINT("after myloop->renameIndex()\n"); + + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + + + +//THIS NEEDS TO MOVE + + + +static PyObject * +chill_permute_v2(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("C permute_v2()\n"); + //int tot = sizeof(args); + //int things = tot / sizeof(PyObject *); + //DEBUG_PRINT("tot %d bytes, %d things\n", tot, things); + + int sstmt = -123; + PyObject *pyObj; + + //if (!PyArg_ParseTuple( args, "iO", &sstmt, &pyObj)) { + //if (!PyArg_ParseTuple( args, "i", &sstmt)) { + if (!PyArg_ParseTuple( args, "O", &pyObj)) { // everything on a single tuple + fprintf(stderr, "failed to parse tuple\n"); + exit(-1); + } + Py_XINCREF(pyObj); + + // the ONLY arg is a tuple. figure out how big it is + int tupleSize = PyTuple_Size(pyObj); + //DEBUG_PRINT("%d things in order tuple\n", tupleSize); + + // first has to be the statement number + PyObject *tupleItem = PyTuple_GetItem(pyObj, 0); + Py_XINCREF(tupleItem); + if (PyInt_Check( tupleItem )) sstmt = PyInt_AsLong( tupleItem ); + else { + fflush(stdout); + fprintf(stderr, "first tuple item in chill_permute_v2 is not an int?\n"); + exit(-1); + } + + //DEBUG_PRINT("stmt %d\n", sstmt); + + char **strings; + std::vector<std::string> order; + std::string *cppstrptr; + std::string cppstr; + + strings = (char **) malloc( sizeof(char *) * tupleSize ) ; // too big + for (int i=1; i<tupleSize; i++) { + tupleItem = PyTuple_GetItem(pyObj, i); + Py_XINCREF(tupleItem); + int im1 = i-1; // offset needed for the actual string vector + if (PyString_Check( tupleItem)) { + strings[im1] = strdup(PyString_AsString(tupleItem)); + //DEBUG_PRINT("item %d = '%s'\n", i, strings[im1]); + //cppstrptr = new std::string( strings[im1] ); + //order.push_back( &(new std::string( strings[im1] ))); + //order.push_back( &cppstrptr ); + + cppstr = strings[im1]; + order.push_back( cppstr ); + } + else { + fprintf(stderr, "later parameter was not a string?\n"); + exit(-1); + } + + } + + myloop->permute_cuda(sstmt,order); + //DEBUG_PRINT("returned from permute_cuda()\n"); + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + + +static PyObject * +chill_tile_v2_3arg( PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("in chillmodule.cc, chill_tile_v2_3arg()\n"); + + int sstmt, level, tile_size, outer_level; + //char index_name[80], control_name[80]; + static char *index_name, *control_name; + int tiling_method; + + if (!PyArg_ParseTuple(args, "iii", &sstmt, &level, &outer_level)) { + fprintf(stderr,"chill_tile_v2, can't parse parameters passed from python\n"); + exit(-1); + } + + // 3 parameter version + //DEBUG_PRINT("chill_tile_v2( %d %d %d) (3 parameter version) \n", + //sstmt,level,outer_level); + myloop->tile_cuda(sstmt,level,outer_level); + //DEBUG_PRINT("chill_tile_v2 3 parameter version returning normally\n"); + Py_RETURN_NONE; +} + + +static PyObject * +chill_tile_v2_7arg( PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("in chillmodule.cc, chill_tile_v2_7arg()\n"); + + int sstmt, level, tile_size, outer_level; + //char index_name[80], control_name[80]; + static char iname[1024], cname[1024]; + static char *index_name = &iname[0], *control_name=&cname[0]; + int tiling_method; + + if (!PyArg_ParseTuple(args, "iiiissi", + &sstmt, &level, &tile_size, &outer_level, + &index_name, &control_name, &tiling_method)){ + fprintf(stderr, "chill_tile_v2_7arg, can't parse parameters passed from python\n"); + exit(-1); + } + + //DEBUG_PRINT("7 parameter version was called?\n"); + + // 7 parameter version was called + //DEBUG_PRINT("tile_v2( %d, %d, %d, %d ... )\n", + // sstmt, level, tile_size, outer_level); + + //DEBUG_PRINT("tile_v2( %d, %d, %d, %d, %s, %s, %d)\n", + //sstmt,level,tile_size,outer_level,index_name, control_name, tiling_method); + + TilingMethodType method = StridedTile; + if (tiling_method == 0) method = StridedTile; + else if (tiling_method == 1) method = CountedTile; + else fprintf(stderr, "ERROR: tile_v2 illegal tiling method, using StridedTile\n"); + + //DEBUG_PRINT("outer level %d\n", outer_level); + //DEBUG_PRINT("calling myloop->tile_cuda( %d, %d, %d, %d, %s, %s, method)\n", + // sstmt, level, tile_size, outer_level, index_name, control_name); + + // BUH level+1? + myloop->tile_cuda(sstmt, level, tile_size, outer_level, index_name, control_name, method); + Py_RETURN_NONE; +} + + +static PyObject * +chill_cur_indices(PyObject *self, PyObject *args) +{ + int stmt_num = -123; + if (!PyArg_ParseTuple(args, "i", &stmt_num)){ + fprintf(stderr, "chill_cur_indides, can't parse statement number passed from python\n"); + exit(-1); + } + //DEBUG_PRINT("cur_indices( %d )\n", stmt_num); + + char formatstring[1024]; + for (int i=0; i<1024; i++) formatstring[i] = '\0'; + + int num = myloop->idxNames[stmt_num].size(); + for(int i=0; i<num; i++){ + //DEBUG_PRINT("myloop->idxNames[%d] index %d = '%s'\n", + //stmt_num, i, myloop->idxNames[stmt_num][i].c_str()); + + // backwards, works because all entries are the same + //sprintf(formatstring, "i %s", formatstring); + strcat( formatstring, "s "); + // put this in a list or something to pass back to python + } + + int l = strlen(formatstring); + if (l > 0) formatstring[l-1] = '\0'; + + //DEBUG_PRINT("%d current indices, format string '%s'\n\n",num,formatstring); + //DEBUG_PRINT("%d current indices\n\n", num); + + //return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(),myloop->idxNames[stmt_num][1].c_str() ); + + // I don't know a clean way to do this. + if (num == 2) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str()); + if (num == 3) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str()); + if (num == 4) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str()); + if (num == 5) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str()); + if (num == 6) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str()); + if (num == 7) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str()); + if (num == 8) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str()); + if (num == 9) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str()); + if (num == 10) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str()); + if (num == 11) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str()); + if (num == 12) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str()); + if (num == 13) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str()); + if (num == 14) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str()); + if (num == 15) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str(), + myloop->idxNames[stmt_num][14].c_str()); + if (num == 16) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str(), + myloop->idxNames[stmt_num][14].c_str(), + myloop->idxNames[stmt_num][15].c_str()); + if (num == 17) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str(), + myloop->idxNames[stmt_num][14].c_str(), + myloop->idxNames[stmt_num][15].c_str(), + myloop->idxNames[stmt_num][16].c_str()); + if (num == 18) return Py_BuildValue(formatstring, myloop->idxNames[stmt_num][0].c_str(), + myloop->idxNames[stmt_num][1].c_str(), + myloop->idxNames[stmt_num][2].c_str(), + myloop->idxNames[stmt_num][3].c_str(), + myloop->idxNames[stmt_num][4].c_str(), + myloop->idxNames[stmt_num][5].c_str(), + myloop->idxNames[stmt_num][6].c_str(), + myloop->idxNames[stmt_num][7].c_str(), + myloop->idxNames[stmt_num][8].c_str(), + myloop->idxNames[stmt_num][9].c_str(), + myloop->idxNames[stmt_num][10].c_str(), + myloop->idxNames[stmt_num][11].c_str(), + myloop->idxNames[stmt_num][12].c_str(), + myloop->idxNames[stmt_num][13].c_str(), + myloop->idxNames[stmt_num][14].c_str(), + myloop->idxNames[stmt_num][15].c_str(), + myloop->idxNames[stmt_num][16].c_str(), + myloop->idxNames[stmt_num][17].c_str()); + + fprintf(stderr, "going to die horribly, num=%d\n", num); +} + + +static PyObject * +chill_block_indices(PyObject *self, PyObject *args) { + + // I'm unsure what the legal states are here + // is it always "bx", or ("bx" and "by") ? + int howmany = 0; + char *loopnames[2]; + if (myloop->cu_bx > 1) { + loopnames[howmany] = strdup("bx"); + howmany++; + } + if (myloop->cu_by > 1) { + loopnames[howmany] = strdup("by"); + howmany++; + } + + if (howmany == 0) return Py_BuildValue("()"); + if (howmany == 1) return Py_BuildValue("(s)", loopnames[0]); + if (howmany == 2) return Py_BuildValue("(ss)", loopnames[0], loopnames[1]); + fprintf(stderr, "chill_block_indices(), gonna die, howmany == %d", howmany); + exit(666); + + Py_RETURN_NONE; +} + +static PyObject * +chill_thread_indices(PyObject *self, PyObject *args) { + + // I'm unsure what the legal states are here + // is it always "tx", or ("tx" and "ty") or ("tx" and "ty" and "tz") ? + int howmany = 0; + char *loopnames[3]; + if (myloop->cu_tx > 1) { + loopnames[howmany++] = strdup("tx"); + } + if (myloop->cu_ty > 1) { + loopnames[howmany++] = strdup("ty"); + } + if (myloop->cu_tz > 1) { + loopnames[howmany++] = strdup("tz"); + } + + if (howmany == 0) return Py_BuildValue("()"); + if (howmany == 1) return Py_BuildValue("(s)", + loopnames[0]); + if (howmany == 2) return Py_BuildValue("(ss)", + loopnames[0], + loopnames[1]); + if (howmany == 3) return Py_BuildValue("(sss)", + loopnames[0], + loopnames[1], + loopnames[2]); + + fprintf(stderr, "chill_thread_indices(), gonna die, howmany == %d", howmany); + exit(999); +} + + + + + +static PyObject * +block_dims(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("block_dims() returning %d %d\n", myloop->cu_bx, myloop->cu_by); + Py_BuildValue( "i i", myloop->cu_bx, myloop->cu_by); +} + + +static PyObject * +thread_dims(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("thread_dims() returning %d %d %d\n", + //myloop->cu_tx, myloop->cu_ty, myloop->cu_tz); + + Py_BuildValue( "i i i", myloop->cu_tx, myloop->cu_ty, myloop->cu_tz); +} + + +static PyObject * +chill_hard_loop_bounds(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("hard_loop_bounds("); + int sstmt, level; // input parameters + int upper, lower; // output + + if (!PyArg_ParseTuple(args, "ii", &sstmt, &level)){ + fprintf(stderr, "hard_loop_bounds, "); + fprintf(stderr, "can't parse statement numbers passed from python\n"); + exit(-1); + } + //DEBUG_PRINT(" %d, %d )\n", sstmt, level); + + myloop->extractCudaUB(sstmt, level, upper, lower); + + //DEBUG_PRINT("lower %d upper %d\n", lower, upper); + + Py_BuildValue( "i i", lower, upper); +} + + +static PyObject * +chill_datacopy9(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\n\n\n***** datacopy_v2() 9ARGS\n"); + + int sstmt; + int level; + std::string cppstr; + std::string array_name; + std::vector<std::string> new_idxs; + bool allow_extra_read; + int fastest_changing_dimension; + int padding_stride; + int padding_alignment; + bool cuda_shared; + + PyObject *pyObj; + + if (!PyArg_ParseTuple( args, "O", &pyObj)) { // everything on a single tuple + + fprintf(stderr, "failed to parse tuple\n"); + exit(-1); + } + Py_XINCREF( pyObj ); + + //if (PyList_Check(pyObj)) fprintf(stderr, "it's a list\n"); + //if (PyTuple_Check(pyObj)) fprintf(stderr, "it's a tuple\n"); + + + + // the ONLY arg is a tuple. figure out how big it is + int tupleSize = PyTuple_Size(pyObj); + //DEBUG_PRINT("%d things in object tuple\n", tupleSize); + + // first has to be the statement number + PyObject *tupleItem1 = PyTuple_GetItem(pyObj, 0); + Py_INCREF(tupleItem1); + if (PyInt_Check( tupleItem1)) sstmt = PyInt_AsLong( tupleItem1 ); + else { + fprintf(stderr, "second tuple item in chill_datacopy9 is not an int?\n"); + exit(-1); + } + //DEBUG_PRINT("stmt %d\n", sstmt); + + PyObject *tupleItem2 = PyTuple_GetItem(pyObj, 1); // second item is level + Py_INCREF(tupleItem2); + if (PyInt_Check( tupleItem2 )) level = PyInt_AsLong( tupleItem2); + else { + fprintf(stderr, "second tuple item in chill_datacopy9 is not an int?\n"); + exit(-1); + } + //DEBUG_PRINT("level %d\n", level ); + + // third item is array name + PyObject *tupleItem3 = PyTuple_GetItem(pyObj, 2); + Py_INCREF(tupleItem3); + array_name = strdup(PyString_AsString(tupleItem3)); + //DEBUG_PRINT("array name '%s'\n", array_name.c_str()); + + + // integer number of indices + PyObject *tupleItem4 = PyTuple_GetItem(pyObj, 3); + Py_INCREF(tupleItem4); + int numindex= PyInt_AsLong( tupleItem4 ); + //DEBUG_PRINT("%d indices\n", numindex); + + + PyObject *tupleItemTEMP; + for (int i=0; i<numindex; i++) { + tupleItemTEMP = PyTuple_GetItem(pyObj, 4+i); + Py_INCREF(tupleItemTEMP); + cppstr = strdup(PyString_AsString(tupleItemTEMP)); + new_idxs.push_back( cppstr ); + //DEBUG_PRINT("%s\n", cppstr.c_str()); + } + + PyObject *tupleItem5 = PyTuple_GetItem(pyObj, 4+numindex); + Py_INCREF(tupleItem5); + allow_extra_read = PyInt_AsLong( tupleItem5 ); + + PyObject *tupleItem6 = PyTuple_GetItem(pyObj, 5+numindex); + Py_INCREF(tupleItem6); + fastest_changing_dimension = PyInt_AsLong( tupleItem6 ); + + PyObject *tupleItem7 = PyTuple_GetItem(pyObj, 6+numindex); + Py_INCREF(tupleItem7); + padding_stride = PyInt_AsLong( tupleItem7 ); + + PyObject *tupleItem8 = PyTuple_GetItem(pyObj, 7+numindex); + Py_INCREF(tupleItem8); + padding_alignment = PyInt_AsLong( tupleItem8 ); + + PyObject *tupleItem9 = PyTuple_GetItem(pyObj, 8+numindex); + Py_INCREF(tupleItem9); + cuda_shared = PyInt_AsLong( tupleItem9 ); + + + //DEBUG_PRINT("calling myloop->datacopy_cuda()\n"); + + // corruption happenes in here??? + myloop->datacopy_cuda(sstmt, level, array_name, new_idxs, + allow_extra_read, fastest_changing_dimension, + padding_stride, padding_alignment, cuda_shared); + + DEBUG_PRINT("before attempt (after actual datacopy)\n"); + //myloop->printCode(); // attempt to debug + DEBUG_PRINT("back from attempt\n"); + + //DEBUG_PRINT("datacopy_9args returning\n"); + + Py_RETURN_NONE; +} + + + + + +static PyObject * +chill_datacopy_privatized(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("C datacopy_privatized\n"); + PyObject *pyObj; + if (!PyArg_ParseTuple( args, "O", &pyObj)) { // everything on a single tuple + fprintf(stderr, "failed to parse tuple\n"); + exit(-1); + } + + PyObject *tupleItem = PyTuple_GetItem(pyObj, 0); // statement number + Py_XINCREF(tupleItem); + int sstmt = PyInt_AsLong( tupleItem ); + + tupleItem = PyTuple_GetItem(pyObj, 1); // start_loop + Py_XINCREF(tupleItem); + std::string start_loop = strdup(PyString_AsString(tupleItem)); + int level = myloop->findCurLevel(sstmt, start_loop); + + + tupleItem = PyTuple_GetItem(pyObj, 2); // array_name + Py_XINCREF(tupleItem); + std::string array_name = strdup(PyString_AsString(tupleItem)); + + // things to hold constant - first a count, then the things + tupleItem = PyTuple_GetItem(pyObj, 3); // how many things in the array + Py_XINCREF(tupleItem); + int howmany = PyInt_AsLong( tupleItem ); + + //DEBUG_PRINT("%d things to hold constant: ", howmany); + std::vector<std::string> holdconstant; + std::string cppstr; + + for (int i=0; i<howmany; i++) { + tupleItem = PyTuple_GetItem(pyObj, 4+i); + Py_XINCREF(tupleItem); + cppstr = strdup(PyString_AsString(tupleItem)); + holdconstant.push_back( cppstr ); // add at end + } + + std::vector<int> privatized_levels(howmany); + for(int i=0; i<howmany; i++) { + privatized_levels[i] = myloop->findCurLevel(sstmt, holdconstant[i]); + //DEBUG_PRINT("privatized_levels[ %d ] = %d\n", i, privatized_levels[i] ); + } + + bool allow_extra_read = false; + int fastest_changing_dimension = -1; + int padding_stride = 1; + int padding_alignment = 1; + bool cuda_shared = false; + + + myloop->datacopy_privatized_cuda(sstmt, level, array_name, privatized_levels, + allow_extra_read, fastest_changing_dimension, + padding_stride, padding_alignment, + cuda_shared); + + + Py_RETURN_NONE; +} + + + + + + +static PyObject * +chill_unroll(PyObject *self, PyObject *args) +{ + int sstmt, level, unroll_amount; + + if (!PyArg_ParseTuple(args, "iii", &sstmt, &level, &unroll_amount)) { + fprintf(stderr, "chill_unroll, can't parse parameters passed from python\n"); + exit(-1); + } + + //DEBUG_PRINT("chill_unroll( %d, %d, %d)\n", sstmt, level, unroll_amount ); + bool does_expand = myloop->unroll_cuda(sstmt,level,unroll_amount); + + // TODO return the boolean? + Py_RETURN_NONE; +} + + + + +static PyObject * +chill_cudaize_v2(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("cudaize_v2\n"); + PyObject *pyObj; + if (!PyArg_ParseTuple( args, "O", &pyObj)) { // everything on a single tuple + fprintf(stderr, "failed to parse tuple\n"); + exit(-1); + } + + // the ONLY arg is a tuple. figure out how big it is + int tupleSize = PyTuple_Size(pyObj); + //DEBUG_PRINT("%d things in tuple\n", tupleSize); + + PyObject *tupleItem = PyTuple_GetItem(pyObj, 0); //the kernel name + Py_XINCREF(tupleItem); + std::string kernel_name = strdup(PyString_AsString(tupleItem)); + + std::map<std::string, int> array_sizes; + tupleItem = PyTuple_GetItem(pyObj, 1); // number of array sizes + Py_XINCREF(tupleItem); + int numarraysizes = PyInt_AsLong( tupleItem ); + + std::string cppstr; + int offset = 2; + for (int i=0; i<numarraysizes; i++) { + tupleItem = PyTuple_GetItem(pyObj, offset++); + Py_XINCREF(tupleItem); + cppstr = strdup(PyString_AsString(tupleItem)); + tupleItem = PyTuple_GetItem(pyObj, offset++); // integer size + int siz = PyInt_AsLong( tupleItem ); + + //DEBUG_PRINT("arraysize for %s = %d\n", cppstr.c_str(), siz); + array_sizes.insert( std::make_pair( cppstr, siz )); + } + + + std::vector<std::string> blockIdxs; + tupleItem = PyTuple_GetItem(pyObj, offset++); // integer number of blocks + Py_XINCREF(tupleItem); + int numblocks = PyInt_AsLong( tupleItem ); + //DEBUG_PRINT("%d blocks\n", numblocks); + for (int i=0; i<numblocks; i++) { + tupleItem = PyTuple_GetItem(pyObj, offset++); + cppstr = strdup(PyString_AsString(tupleItem)); + blockIdxs.push_back( cppstr ); + //DEBUG_PRINT("%s\n", cppstr.c_str()); + } + + std::vector<std::string> threadIdxs; + tupleItem = PyTuple_GetItem(pyObj, offset++); // integer number of threads + Py_XINCREF(tupleItem); + int numthreads= PyInt_AsLong( tupleItem ); + //DEBUG_PRINT("%d threads\n", numthreads); + for (int i=0; i<numthreads; i++) { + tupleItem = PyTuple_GetItem(pyObj, offset++); + Py_XINCREF(tupleItem); + cppstr = strdup(PyString_AsString(tupleItem)); + threadIdxs.push_back( cppstr ); + //DEBUG_PRINT("%s\n", cppstr.c_str()); + } + + + myloop->cudaize_v2(kernel_name, array_sizes, blockIdxs, threadIdxs); + + Py_RETURN_NONE; // return Py_BuildValue( "" ); +} + + + +static PyObject *get_loop_num() { + // TODO get_loop_num() it's a global value? + fprintf(stderr, "get_loop_num() UNIMPLEMENTED\n"); + exit(-1); +} + + + + +static PyObject * +chill_copy_to_texture(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("C copy_to_texture() called from python \n"); + const char *array_name; + if (!PyArg_ParseTuple(args, "s", &array_name)){ + fprintf(stderr, "chill_copy_to_texture can't parse array name\n"); + exit(-1); + } + //DEBUG_PRINT("array name = %s\n", array_name); + myloop->copy_to_texture(array_name); + + Py_RETURN_NONE; +} + + + + + + + +static PyObject * +chill_init(PyObject *self, PyObject *args) +{ + DEBUG_PRINT("C chill_init() called from python as read_IR()\n"); + DEBUG_PRINT("C init( "); + const char *filename; + const char *procname; + if (!PyArg_ParseTuple(args, "ss", &filename, &procname)){ + fprintf(stderr, "umwut? can't parse file name and procedure name?\n"); + exit(-1); + } + + int loop_num = 0; + + DEBUG_PRINT("%s, 0, 0 )\n", filename); + + DEBUG_PRINT("GETTING IR CODE in chill_init() in chillmodule.cc\n"); + DEBUG_PRINT("ir_code = new IR_cudaroseCode(%s, %s);\n",filename, procname); + ir_code = new IR_cudaroseCode(filename, procname); //this produces 15000 lines of output + fflush(stdout); + + + + + //protonu--here goes my initializations + //A lot of this code was lifted from Chun's parser.yy + //the plan is now to create the LoopCuda object directly + IR_Block *block = ir_code->GetCode(); + DEBUG_PRINT("ir_code->FindOneLevelControlStructure(block); chillmodule.cc\n"); + ir_controls = ir_code->FindOneLevelControlStructure(block); + + int loop_count = 0; + for (int i = 0; i < ir_controls.size(); i++) { + if (ir_controls[i]->type() == IR_CONTROL_LOOP) { + loops.push_back(i); + loop_count++; + } + } + delete block; + + + std::vector<IR_Control *> parm; + for(int j = 0; j < loop_count; j++) + parm.push_back(ir_controls[loops[j]]); + + + DEBUG_PRINT("block = ir_code->MergeNeighboringControlStructures(parm);\n"); + block = ir_code->MergeNeighboringControlStructures(parm); + + //DEBUG_PRINT("myloop = new LoopCuda(block, loop_num); in chillmodule.cc\n"); + myloop = new LoopCuda(block, loop_num); + fflush(stdout); DEBUG_PRINT("back\n"); + delete block; + + //end-protonu + + fflush(stdout); + DEBUG_PRINT("myloop->original();\n"); + myloop->original(); + fflush(stdout); + DEBUG_PRINT("myloop->useIdxNames=true;\n"); + myloop->useIdxNames=true;//Use idxName in code_gen + //register_v2(L); + + fflush(stdout); + DEBUG_PRINT("chill_init DONE\n"); + Py_RETURN_NONE; // return Py_BuildValue( "" ); + +} + +#else +// ------------------------- // +// CHiLL interface functions // +// ------------------------- // + +static PyObject* chill_source(PyObject* self, PyObject* args) { + strict_arg_num(args, 1, "source"); + source_filename = strArg(args, 0); + Py_RETURN_NONE; +} + +static PyObject* chill_procedure(PyObject* self, PyObject* args) { + if(!procedure_name.empty()) { + fprintf(stderr, "only one procedure can be handled in a script"); + if(!is_interactive) + exit(2); + } + procedure_name = strArg(args, 0); + Py_RETURN_NONE; +} + +static PyObject* chill_loop(PyObject* self, PyObject* args) { + // loop (n) + // loop (n:m) + + int nargs = PyTuple_Size(args); + int start_num; + int end_num; + if(nargs == 1) { + start_num = intArg(args, 0); + end_num = start_num; + } + else if(nargs == 2) { + start_num = intArg(args, 0); + end_num = intArg(args, 1); + } + else { + fprintf(stderr, "loop takes one or two arguments"); + if(!is_interactive) + exit(2); + } + set_loop_num_start(start_num); + set_loop_num_end(end_num); + init_loop(start_num, end_num); + Py_RETURN_NONE; +} + +static PyObject* chill_print_code(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "print_code"); + myloop->printCode(); + printf("\n"); + Py_RETURN_NONE; +} + +static PyObject* chill_print_dep(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "print_dep"); + myloop->printDependenceGraph(); + Py_RETURN_NONE; +} + +static PyObject* chill_print_space(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "print_space"); + myloop->printIterationSpace(); + Py_RETURN_NONE; +} + +static PyObject* chill_exit(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "exit"); + repl_stop = true; + Py_RETURN_NONE; +} + +static void add_known(std::string cond_expr) { + int num_dim = myloop->known.n_set(); + std::vector<std::map<std::string, int> >* cond; + cond = parse_relation_vector(cond_expr.c_str()); + + Relation rel(num_dim); + F_And *f_root = rel.add_and(); + for (int j = 0; j < cond->size(); j++) { + GEQ_Handle h = f_root->add_GEQ(); + for (std::map<std::string, int>::iterator it = (*cond)[j].begin(); it != (*cond)[j].end(); it++) { + try { + int dim = from_string<int>(it->first); + if (dim == 0) + h.update_const(it->second); + else + throw std::invalid_argument("only symbolic variables are allowed in known condition"); + } + catch (std::ios::failure e) { + Free_Var_Decl *g = NULL; + for (unsigned i = 0; i < myloop->freevar.size(); i++) { + std::string name = myloop->freevar[i]->base_name(); + if (name == it->first) { + g = myloop->freevar[i]; + break; + } + } + if (g == NULL) + throw std::invalid_argument("symbolic variable " + it->first + " not found"); + else + h.update_coef(rel.get_local(g), it->second); + } + } + } + myloop->addKnown(rel); +} + +static PyObject* chill_known(PyObject* self, PyObject* args) { + strict_arg_num(args, 1, "known"); + if (PyList_Check(PyTuple_GetItem(args, 0))) { + PyObject* list = PyTuple_GetItem(args, 0); + for (int i = 0; i < PyList_Size(list); i++) { + add_known(std::string(PyString_AsString(PyList_GetItem(list, i)))); + } + } + else { + add_known(strArg(args, 0)); + } + Py_RETURN_NONE; +} + +static PyObject* chill_remove_dep(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "remove_dep"); + int from = intArg(args, 0); + int to = intArg(args, 1); + myloop->removeDependence(from, to); + Py_RETURN_NONE; +} + +static PyObject* chill_original(PyObject* self, PyObject* args) { + strict_arg_num(args, 0, "original"); + myloop->original(); + Py_RETURN_NONE; +} + +static PyObject* chill_permute(PyObject* self, PyObject* args) { + int nargs = strict_arg_range(args, 1, 3, "permute"); + if((nargs < 1) || (nargs > 3)) + throw std::runtime_error("incorrect number of arguments in permute"); + if(nargs == 1) { + // premute ( vector ) + std::vector<int> pi; + if(!tointvector(args, 0, pi)) + throw std::runtime_error("first arg in permute(pi) must be an int vector"); + myloop->permute(pi); + } + else if (nargs == 2) { + // permute ( set, vector ) + std::set<int> active; + std::vector<int> pi; + if(!tointset(args, 0, active)) + throw std::runtime_error("the first argument in permute(active, pi) must be an int set"); + if(!tointvector(args, 1, pi)) + throw std::runtime_error("the second argument in permute(active, pi) must be an int vector"); + myloop->permute(active, pi); + } + else if (nargs == 3) { + int stmt_num = intArg(args, 1); + int level = intArg(args, 2); + std::vector<int> pi; + if(!tointvector(args, 2, pi)) + throw std::runtime_error("the third argument in permute(stmt_num, level, pi) must be an int vector"); + myloop->permute(stmt_num, level, pi); + } + Py_RETURN_NONE; +} + +static PyObject* chill_pragma(PyObject* self, PyObject* args) { + strict_arg_num(args, 3, "pragma"); + int stmt_num = intArg(args, 1); + int level = intArg(args, 1); + std::string pragmaText = strArg(args, 2); + myloop->pragma(stmt_num, level, pragmaText); + Py_RETURN_NONE; +} + +static PyObject* chill_prefetch(PyObject* self, PyObject* args) { + strict_arg_num(args, 3, "prefetch"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + std::string prefetchText = strArg(args, 2); + int hint = intArg(args, 3); + myloop->prefetch(stmt_num, level, prefetchText, hint); + Py_RETURN_NONE; +} + +static PyObject* chill_tile(PyObject* self, PyObject* args) { + int nargs = strict_arg_range(args, 3, 7, "tile"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int tile_size = intArg(args, 2); + if(nargs == 3) { + myloop->tile(stmt_num, level, tile_size); + } + else if(nargs >= 4) { + int outer_level = intArg(args, 3); + if(nargs >= 5) { + TilingMethodType method = StridedTile; + int imethod = intArg(args, 4, 2); //< don't know if a default value is needed + // check method input against expected values + if (imethod == 0) + method = StridedTile; + else if (imethod == 1) + method = CountedTile; + else + throw std::runtime_error("5th argument must be either strided or counted"); + if(nargs >= 6) { + int alignment_offset = intArg(args, 5); + if(nargs == 7) { + int alignment_multiple = intArg(args, 6, 1); + myloop->tile(stmt_num, level, tile_size, outer_level, method, alignment_offset, alignment_multiple); + } + if(nargs == 6) + myloop->tile(stmt_num, level, tile_size, outer_level, method, alignment_offset); + } + if(nargs == 5) + myloop->tile(stmt_num, level, tile_size, outer_level, method); + } + if(nargs == 4) + myloop->tile(stmt_num, level, tile_size, outer_level); + } + Py_RETURN_NONE; +} + +static void chill_datacopy_vec(PyObject* args) { + // Overload 1: bool datacopy( + // const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, + // int level, + // bool allow_extra_read = false, + // int fastest_changing_dimension = -1, + // int padding_stride = 1, + // int padding_alignment = 4, + // int memory_type = 0); + std::vector<std::pair<int, std::vector<int> > > array_ref_nums; + // expect list(tuple(int,list(int))) + // or dict(int,list(int)) + if(PyList_CheckExact(PyTuple_GetItem(args, 0))) { + PyObject* list = PyTuple_GetItem(args, 0); + for(int i = 0; i < PyList_Size(list); i ++) { + PyObject* tup = PyList_GetItem(list, i); + int index = PyLong_AsLong(PyTuple_GetItem(tup, 0)); + std::vector<int> vec; + tointvector(PyTuple_GetItem(tup, 1), vec); + array_ref_nums.push_back(std::pair<int, std::vector<int> >(index, vec)); + } + } + else if(PyList_CheckExact(PyTuple_GetItem(args, 0))) { + PyObject* dict = PyTuple_GetItem(args, 0); + PyObject* klist = PyDict_Keys(dict); + for(int ki = 0; ki < PyList_Size(klist); ki++) { + PyObject* index = PyList_GetItem(klist, ki); + std::vector<int> vec; + tointvector(PyDict_GetItem(dict,index), vec); + array_ref_nums.push_back(std::pair<int, std::vector<int> >(PyLong_AsLong(index), vec)); + } + Py_DECREF(klist); + } + else { + //TODO: this should never happen + } + int level = intArg(args, 1); + bool allow_extra_read = boolArg(args, 2, false); + int fastest_changing_dimension = intArg(args, 3, -1); + int padding_stride = intArg(args, 4, 1); + int padding_alignment = intArg(args, 5, 4); + int memory_type = intArg(args, 6, 0); + myloop->datacopy(array_ref_nums, level, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + +static void chill_datacopy_int(PyObject* args) { + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + std::string array_name = strArg(args,2,0); + bool allow_extra_read = boolArg(args,3,false); + int fastest_changing_dimension = intArg(args, 4, -1); + int padding_stride = intArg(args, 5, 1); + int padding_alignment = intArg(args, 6, 4); + int memory_type = intArg(args, 7, 0); + myloop->datacopy(stmt_num, level, array_name, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + +static PyObject* chill_datacopy(PyObject* self, PyObject* args) { + // Overload 2: bool datacopy(int stmt_num, int level, const std::string &array_name, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, int memory_type = 0); + int nargs = strict_arg_range(args, 3, 7, "datacopy"); + if(PyList_CheckExact(PyTuple_GetItem(args,0)) || PyDict_CheckExact(PyTuple_GetItem(args, 0))) { + chill_datacopy_vec(args); + } + else { + chill_datacopy_int(args); + } + Py_RETURN_NONE; +} + +static PyObject* chill_datacopy_privatized(PyObject* self, PyObject* args) { + // bool datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, int memory_type = 0); + int nargs = strict_arg_range(args, 4, 9, "datacopy_privatized"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + std::string array_name = strArg(args, 2); + std::vector<int> privatized_levels; + tointvector(args, 3, privatized_levels); + bool allow_extra_read = boolArg(args, 4, false); + int fastest_changing_dimension = intArg(args, 5, -1); + int padding_stride = intArg(args, 6, 1); + int padding_alignment = intArg(args, 7, 1); + int memory_type = intArg(args, 8); + myloop->datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); + Py_RETURN_NONE; +} + +static PyObject* chill_unroll(PyObject* self, PyObject* args) { + int nargs = strict_arg_range(args, 3, 4, "unroll"); + //std::set<int> unroll(int stmt_num, int level, int unroll_amount, std::vector< std::vector<std::string> >idxNames= std::vector< std::vector<std::string> >(), int cleanup_split_level = 0); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int unroll_amount = intArg(args, 2); + std::vector< std::vector<std::string> > idxNames = std::vector< std::vector<std::string> >(); + int cleanup_split_level = intArg(args, 3); + myloop->unroll(stmt_num, level, unroll_amount, idxNames, cleanup_split_level); + Py_RETURN_NONE; +} + +static PyObject* chill_unroll_extra(PyObject* self, PyObject* args) { + int nargs = strict_arg_range(args, 3, 4, "unroll_extra"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int unroll_amount = intArg(args, 2); + int cleanup_split_level = intArg(args, 3, 0); + myloop->unroll_extra(stmt_num, level, unroll_amount, cleanup_split_level); + Py_RETURN_NONE; +} + +static PyObject* chill_split(PyObject* self, PyObject* args) { + strict_arg_num(args, 3, "split"); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int num_dim = myloop->stmt[stmt_num].xform.n_out(); + + std::vector<std::map<std::string, int> >* cond; + std::string cond_expr = strArg(args, 2); + cond = parse_relation_vector(cond_expr.c_str()); + + Relation rel((num_dim-1)/2); + F_And *f_root = rel.add_and(); + for (int j = 0; j < cond->size(); j++) { + GEQ_Handle h = f_root->add_GEQ(); + for (std::map<std::string, int>::iterator it = (*cond)[j].begin(); it != (*cond)[j].end(); it++) { + try { + int dim = from_string<int>(it->first); + if (dim == 0) + h.update_const(it->second); + else { + if (dim > (num_dim-1)/2) + throw std::invalid_argument("invalid loop level " + to_string(dim) + " in split condition"); + h.update_coef(rel.set_var(dim), it->second); + } + } + catch (std::ios::failure e) { + Free_Var_Decl *g = NULL; + for (unsigned i = 0; i < myloop->freevar.size(); i++) { + std::string name = myloop->freevar[i]->base_name(); + if (name == it->first) { + g = myloop->freevar[i]; + break; + } + } + if (g == NULL) + throw std::invalid_argument("unrecognized variable " + to_string(it->first.c_str())); + h.update_coef(rel.get_local(g), it->second); + } + } + } + myloop->split(stmt_num,level,rel); + Py_RETURN_NONE; +} + +static PyObject* chill_nonsingular(PyObject* self, PyObject* args) { + std::vector< std::vector<int> > mat; + tointmatrix(args, 0, mat); + myloop->nonsingular(mat); + Py_RETURN_NONE; +} + +static PyObject* chill_skew(PyObject* self, PyObject* args) { + std::set<int> stmt_nums; + std::vector<int> skew_amounts; + int level = intArg(args, 1); + tointset(args, 0, stmt_nums); + tointvector(args, 2, skew_amounts); + myloop->skew(stmt_nums, level, skew_amounts); + Py_RETURN_NONE; +} + +static PyObject* chill_scale(PyObject* self, PyObject* args) { + strict_arg_num(args, 3); + std::set<int> stmt_nums; + int level = intArg(args, 1); + int scale_amount = intArg(args, 2); + tointset(args, 0, stmt_nums); + myloop->scale(stmt_nums, level, scale_amount); + Py_RETURN_NONE; +} + +static PyObject* chill_reverse(PyObject* self, PyObject* args) { + strict_arg_num(args, 2); + std::set<int> stmt_nums; + int level = intArg(args, 1); + tointset(args, 0, stmt_nums); + myloop->reverse(stmt_nums, level); + Py_RETURN_NONE; +} + +static PyObject* chill_shift(PyObject* self, PyObject* args) { + strict_arg_num(args, 3); + std::set<int> stmt_nums; + int level = intArg(args, 1); + int shift_amount = intArg(args, 2); + tointset(args, 0, stmt_nums); + myloop->shift(stmt_nums, level, shift_amount); + Py_RETURN_NONE; +} + +static PyObject* chill_shift_to(PyObject* self, PyObject* args) { + strict_arg_num(args, 3); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int absolute_pos = intArg(args, 2); + myloop->shift_to(stmt_num, level, absolute_pos); + Py_RETURN_NONE; +} + +static PyObject* chill_peel(PyObject* self, PyObject* args) { + strict_arg_range(args, 2, 3); + int stmt_num = intArg(args, 0); + int level = intArg(args, 1); + int amount = intArg(args, 2); + myloop->peel(stmt_num, level, amount); + Py_RETURN_NONE; +} + +static PyObject* chill_fuse(PyObject* self, PyObject* args) { + strict_arg_num(args, 2); + std::set<int> stmt_nums; + int level = intArg(args, 1); + tointset(args, 0, stmt_nums); + myloop->fuse(stmt_nums, level); + Py_RETURN_NONE; +} + +static PyObject* chill_distribute(PyObject* self, PyObject* args) { + strict_arg_num(args, 2); + std::set<int> stmts; + int level = intArg(args, 1); + tointset(args, 0, stmts); + myloop->distribute(stmts, level); + Py_RETURN_NONE; +} + +static PyObject * +chill_num_statements(PyObject *self, PyObject *args) +{ + //DEBUG_PRINT("\nC chill_num_statements() called from python\n"); + int num = myloop->stmt.size(); + //DEBUG_PRINT("C num_statement() = %d\n", num); + return Py_BuildValue( "i", num ); // BEWARE "d" is DOUBLE, not int +} +#endif + +#ifdef CUDACHILL +static PyMethodDef ChillMethods[] = { + + // python name C routine parameter passing comment + {"print_code", chill_print_code, METH_VARARGS, "print the code at this point"}, + {"print_ri", chill_print_ri , METH_VARARGS, "print Runtime Info "}, + {"print_idx", chill_print_idx , METH_VARARGS, "print indices "}, + {"print_dep", chill_print_dep , METH_VARARGS, "print dep, dependecies?"}, + {"print_space", chill_print_space, METH_VARARGS, "print something or other "}, + {"add_sync", chill_add_sync, METH_VARARGS, "add sync, whatever that is"}, + {"rename_index", chill_rename_index, METH_VARARGS, "rename a loop index"}, + {"permute", chill_permute_v2, METH_VARARGS, "change the order of loops?"}, + {"tile3", chill_tile_v2_3arg, METH_VARARGS, "something to do with tile"}, + {"tile7", chill_tile_v2_7arg, METH_VARARGS, "something to do with tile"}, + {"thread_dims", thread_dims, METH_VARARGS, "tx, ty, tz "}, + {"block_dims", block_dims, METH_VARARGS, "bx, by"}, + {"thread_indices", chill_thread_indices, METH_VARARGS, "bx, by"}, + {"block_indices", chill_block_indices, METH_VARARGS, "bx, by"}, + {"hard_loop_bounds", chill_hard_loop_bounds, METH_VARARGS, "lower, upper"}, + {"unroll", chill_unroll, METH_VARARGS, "unroll a loop"}, + {"cudaize", chill_cudaize_v2, METH_VARARGS, "dunno"}, + {"datacopy_privatized", chill_datacopy_privatized, METH_VARARGS, "dunno"}, + + {"datacopy_9arg", chill_datacopy9, METH_VARARGS, "datacopy with 9 arguments"}, + {"copy_to_texture", chill_copy_to_texture, METH_VARARGS, "copy to texture mem"}, + {"read_IR", chill_init, METH_VARARGS, "read an Intermediate Representation file"}, + {"cur_indices", chill_cur_indices, METH_VARARGS, "currently active indices"}, + {"num_statements", chill_num_statements, METH_VARARGS, "number of statements in ... something"}, + {NULL, NULL, 0, NULL} /* Sentinel */ + + //{"copy_to_constant", chill_copy_to_constant, METH_VARARGS, "copy to constant mem"}, + +}; +#else +static PyMethodDef ChillMethods[] = { + + //python name C routine parameter passing comment + {"source", chill_source, METH_VARARGS, "set source file for chill script"}, + {"procedure", chill_procedure, METH_VARARGS, "set the name of the procedure"}, + {"loop", chill_loop, METH_VARARGS, "indicate which loop to optimize"}, + {"print_code", chill_print_code, METH_VARARGS, "print generated code"}, + {"print_dep", chill_print_dep, METH_VARARGS, "print the dependencies graph"}, + {"print_space", chill_print_space, METH_VARARGS, "print space"}, + {"exit", chill_exit, METH_VARARGS, "exit the interactive consule"}, + {"known", chill_known, METH_VARARGS, "knwon"}, + {"remove_dep", chill_remove_dep, METH_VARARGS, "remove dependency i suppose"}, + {"original", chill_original, METH_VARARGS, "original"}, + {"permute", chill_permute, METH_VARARGS, "permute"}, + {"pragma", chill_pragma, METH_VARARGS, "pragma"}, + {"prefetch", chill_prefetch, METH_VARARGS, "prefetch"}, + {"tile", chill_tile, METH_VARARGS, "tile"}, + {"datacopy", chill_datacopy, METH_VARARGS, "datacopy"}, + {"datacopy_privitized", chill_datacopy_privatized, METH_VARARGS, "datacopy_privatized"}, + {"unroll", chill_unroll, METH_VARARGS, "unroll"}, + {"unroll_extra", chill_unroll_extra, METH_VARARGS, "unroll_extra"}, + {"split", chill_split, METH_VARARGS, "split"}, + {"nonsingular", chill_nonsingular, METH_VARARGS, "nonsingular"}, + {"skew", chill_skew, METH_VARARGS, "skew"}, + {"scale", chill_scale, METH_VARARGS, "scale"}, + {"reverse", chill_reverse, METH_VARARGS, "reverse"}, + {"shift", chill_shift, METH_VARARGS, "shift"}, + {"shift_to", chill_shift_to, METH_VARARGS, "shift_to"}, + {"peel", chill_peel, METH_VARARGS, "peel"}, + {"fuse", chill_fuse, METH_VARARGS, "fuse"}, + {"distribute", chill_distribute, METH_VARARGS, "distribute"}, + {"num_statements", chill_num_statements, METH_VARARGS, "number of statements in the current loop"}, + {NULL, NULL, 0, NULL} +}; +#endif + +static void register_globals(PyObject* m) { + // Preset globals + PyModule_AddStringConstant(m, "VERSION", CHILL_BUILD_VERSION); + PyModule_AddStringConstant(m, "dest", "C"); + PyModule_AddStringConstant(m, "C", "C"); + // Tile method + PyModule_AddIntConstant(m, "strided", 0); + PyModule_AddIntConstant(m, "counted", 1); + // Memory mode + PyModule_AddIntConstant(m, "global", 0); + PyModule_AddIntConstant(m, "shared", 1); + PyModule_AddIntConstant(m, "textured", 2); + // Bool flags + PyModule_AddIntConstant(m, "sync", 1); +} + +PyMODINIT_FUNC +initchill(void) // pass C methods to python +{ + DEBUG_PRINT("in C, initchill() to set up C methods to be called from python\n"); + PyObject* m = Py_InitModule("chill", ChillMethods); + register_globals(m); +} diff --git a/chillmodule.hh b/chillmodule.hh new file mode 100644 index 0000000..a64ad1b --- /dev/null +++ b/chillmodule.hh @@ -0,0 +1,21 @@ +#ifndef BASIC_CHILLMODULE_HH +#define BASIC_CHILLMODULE_HH +// TODO Python.h defines these and something else does too +#undef _POSIX_C_SOURCE +#undef _XOPEN_SOURCE + +#include <Python.h> + +// a C routine that will be called from python +//static PyObject * chill_print_code(PyObject *self, PyObject *args); + +//static PyMethodDef ChillMethods[] ; + +#ifndef CUDACHILL +void finalize_loop(int loop_num_start, int loop_num_end); +int get_loop_num_start(); +int get_loop_num_end(); +#endif + +PyMODINIT_FUNC initchill() ; // pass C methods to python +#endif @@ -0,0 +1,567 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Data dependence vector and graph. + + Notes: + All dependence vectors are normalized, i.e., the first non-zero distance + must be positve. Thus the correct dependence meaning can be given based on + source/destination pair's read/write type. Suppose for a dependence vector + 1, 0~5, -3), we want to permute the first and the second dimension, + the result would be two dependence vectors (0, 1, -3) and (1~5, 1, -3). + All operations on dependence vectors are non-destructive, i.e., new + dependence vectors are returned. + + History: + 01/2006 Created by Chun Chen. + 03/2009 Use IR_Ref interface in source and destination arrays -chun +*****************************************************************************/ + +#include "dep.hh" + +//----------------------------------------------------------------------------- +// Class: DependeceVector +//----------------------------------------------------------------------------- + +std::ostream& operator<<(std::ostream &os, const DependenceVector &d) { + if (d.sym != NULL) { + os << d.sym->name(); + os << ':'; + if (d.quasi) + os << "_quasi"; + + } + + switch (d.type) { + case DEP_W2R: + os << "flow"; + if (d.is_reduction) + os << "_reduction"; + break; + case DEP_R2W: + os << "anti"; + break; + case DEP_W2W: + os << "output"; + break; + case DEP_R2R: + os << "input"; + break; + case DEP_CONTROL: + os << "control"; + break; + default: + os << "unknown"; + break; + } + + os << '('; + + for (int i = 0; i < d.lbounds.size(); i++) { + omega::coef_t lbound = d.lbounds[i]; + omega::coef_t ubound = d.ubounds[i]; + + if (lbound == ubound) + os << lbound; + else { + if (lbound == -posInfinity) + if (ubound == posInfinity) + os << '*'; + else { + if (ubound == -1) + os << '-'; + else + os << ubound << '-'; + } + else if (ubound == posInfinity) { + if (lbound == 1) + os << '+'; + else + os << lbound << '+'; + } else + os << lbound << '~' << ubound; + } + + if (i < d.lbounds.size() - 1) + os << ", "; + } + + os << ')'; + + return os; +} + +// DependenceVector::DependenceVector(int size): +// lbounds(std::vector<coef_t>(size, 0)), +// ubounds(std::vector<coef_t>(size, 0)) { +// src = NULL; +// dst = NULL; +// } + +DependenceVector::DependenceVector(const DependenceVector &that) { + if (that.sym != NULL) + this->sym = that.sym->clone(); + else + this->sym = NULL; + this->type = that.type; + this->lbounds = that.lbounds; + this->ubounds = that.ubounds; + quasi = that.quasi; + is_scalar_dependence = that.is_scalar_dependence; + is_reduction = that.is_reduction; +} + +DependenceVector &DependenceVector::operator=(const DependenceVector &that) { + if (this != &that) { + delete this->sym; + if (that.sym != NULL) + this->sym = that.sym->clone(); + else + this->sym = NULL; + this->type = that.type; + this->lbounds = that.lbounds; + this->ubounds = that.ubounds; + quasi = that.quasi; + is_scalar_dependence = that.is_scalar_dependence; + is_reduction = that.is_reduction; + } + return *this; +} +DependenceType DependenceVector::getType() const { + return type; +} + +bool DependenceVector::is_data_dependence() const { + if (type == DEP_W2R || type == DEP_R2W || type == DEP_W2W + || type == DEP_R2R) + return true; + else + return false; +} + +bool DependenceVector::is_control_dependence() const { + if (type == DEP_CONTROL) + return true; + else + return false; +} + +bool DependenceVector::has_negative_been_carried_at(int dim) const { + if (!is_data_dependence()) + throw std::invalid_argument("only works for data dependences"); + + if (dim < 0 || dim >= lbounds.size()) + return false; + + for (int i = 0; i < dim; i++) + if (lbounds[i] > 0 || ubounds[i] < 0) + return false; + + if (lbounds[dim] < 0) + return true; + else + return false; +} + + +bool DependenceVector::has_been_carried_at(int dim) const { + if (!is_data_dependence()) + throw std::invalid_argument("only works for data dependences"); + + if (dim < 0 || dim >= lbounds.size()) + return false; + + for (int i = 0; i < dim; i++) + if (lbounds[i] > 0 || ubounds[i] < 0) + return false; + + if ((lbounds[dim] != 0) || (ubounds[dim] !=0)) + return true; + + return false; +} + +bool DependenceVector::has_been_carried_before(int dim) const { + if (!is_data_dependence()) + throw std::invalid_argument("only works for data dependences"); + + if (dim < 0) + return false; + if (dim > lbounds.size()) + dim = lbounds.size(); + + for (int i = 0; i < dim; i++) { + if (lbounds[i] > 0) + return true; + if (ubounds[i] < 0) + return true; + } + + return false; +} + +bool DependenceVector::isZero() const { + return isZero(lbounds.size() - 1); +} + +bool DependenceVector::isZero(int dim) const { + if (dim >= lbounds.size()) + throw std::invalid_argument("invalid dependence dimension"); + + for (int i = 0; i <= dim; i++) + if (lbounds[i] != 0 || ubounds[i] != 0) + return false; + + return true; +} + +bool DependenceVector::isPositive() const { + for (int i = 0; i < lbounds.size(); i++) + if (lbounds[i] != 0 || ubounds[i] != 0) { + if (lbounds[i] < 0) + return false; + else if (lbounds[i] > 0) + return true; + } + + return false; +} + +bool DependenceVector::isNegative() const { + for (int i = 0; i < lbounds.size(); i++) + if (lbounds[i] != 0 || ubounds[i] != 0) { + if (ubounds[i] > 0) + return false; + else if (ubounds[i] < 0) + return true; + } + + return false; +} + +bool DependenceVector::isAllPositive() const { + for (int i = 0; i < lbounds.size(); i++) + if (lbounds[i] < 0) + return false; + + return true; +} + +bool DependenceVector::isAllNegative() const { + for (int i = 0; i < ubounds.size(); i++) + if (ubounds[i] > 0) + return false; + + return true; +} + +bool DependenceVector::hasPositive(int dim) const { + if (dim >= lbounds.size()) + throw std::invalid_argument("invalid dependence dimension"); + + if (lbounds[dim] > 0) + //av: changed from ubounds to lbounds may have side effects + return true; + else + return false; +} + +bool DependenceVector::hasNegative(int dim) const { + if (dim >= lbounds.size()) + throw std::invalid_argument("invalid dependence dimension"); + + if (ubounds[dim] < 0) + //av: changed from lbounds to ubounds may have side effects + return true; + else + return false; +} + +bool DependenceVector::isCarried(int dim, omega::coef_t distance) const { + if (distance <= 0) + throw std::invalid_argument("invalid dependence distance size"); + + if (dim > lbounds.size()) + dim = lbounds.size(); + + for (int i = 0; i < dim; i++) + if (lbounds[i] > 0) + return false; + else if (ubounds[i] < 0) + return false; + + if (dim >= lbounds.size()) + return true; + + if (lbounds[dim] > distance) + return false; + else if (ubounds[dim] < -distance) + return false; + + return true; +} + +bool DependenceVector::canPermute(const std::vector<int> &pi) const { + if (pi.size() != lbounds.size()) + throw std::invalid_argument( + "permute dimensionality do not match dependence space"); + + for (int i = 0; i < pi.size(); i++) { + if (lbounds[pi[i]] > 0) + return true; + else if (lbounds[pi[i]] < 0) + return false; + } + + return true; +} + +std::vector<DependenceVector> DependenceVector::normalize() const { + std::vector<DependenceVector> result; + + DependenceVector dv(*this); + for (int i = 0; i < dv.lbounds.size(); i++) { + if (dv.lbounds[i] < 0 && dv.ubounds[i] >= 0) { + omega::coef_t t = dv.ubounds[i]; + dv.ubounds[i] = -1; + result.push_back(dv); + dv.lbounds[i] = 0; + dv.ubounds[i] = t; + } + if (dv.lbounds[i] == 0 && dv.ubounds[i] > 0) { + dv.lbounds[i] = 1; + result.push_back(dv); + dv.lbounds[i] = 0; + dv.ubounds[i] = 0; + } + if (dv.lbounds[i] == 0 && dv.ubounds[i] == 0) + continue; + else + break; + } + + result.push_back(dv); + return result; +} + +std::vector<DependenceVector> DependenceVector::permute( + const std::vector<int> &pi) const { + if (pi.size() != lbounds.size()) + throw std::invalid_argument( + "permute dimensionality do not match dependence space"); + + const int n = lbounds.size(); + + DependenceVector dv(*this); + for (int i = 0; i < n; i++) { + dv.lbounds[i] = lbounds[pi[i]]; + dv.ubounds[i] = ubounds[pi[i]]; + } + + int violated = 0; + + for (int i = 0; i < n; i++) { + if (dv.lbounds[i] > 0) + break; + else if (dv.lbounds[i] < 0) + violated = 1; + } + + if (((violated == 1) && !quasi) && !is_scalar_dependence) { + throw ir_error("dependence violation"); + + } + + return dv.normalize(); +} + +DependenceVector DependenceVector::reverse() const { + const int n = lbounds.size(); + + DependenceVector dv(*this); + switch (type) { + case DEP_W2R: + dv.type = DEP_R2W; + break; + case DEP_R2W: + dv.type = DEP_W2R; + break; + default: + dv.type = type; + } + + for (int i = 0; i < n; i++) { + dv.lbounds[i] = -ubounds[i]; + dv.ubounds[i] = -lbounds[i]; + } + dv.quasi = true; + + return dv; +} + +// std::vector<DependenceVector> DependenceVector::matrix(const std::vector<std::vector<int> > &M) const { +// if (M.size() != lbounds.size()) +// throw std::invalid_argument("(non)unimodular transformation dimensionality does not match dependence space"); + +// const int n = lbounds.size(); +// DependenceVector dv; +// if (sym != NULL) +// dv.sym = sym->clone(); +// else +// dv.sym = NULL; +// dv.type = type; + +// for (int i = 0; i < n; i++) { +// assert(M[i].size() == n+1 || M[i].size() == n); + +// omega::coef_t lb, ub; +// if (M[i].size() == n+1) +// lb = ub = M[i][n]; +// else +// lb = ub = 0; + +// for (int j = 0; j < n; j++) { +// int c = M[i][j]; +// if (c == 0) +// continue; + +// if (c > 0) { +// if (lbounds[j] == -posInfinity) +// lb = -posInfinity; +// else if (lb != -posInfinity) +// lb += c * lbounds[j]; +// if (ubounds[j] == posInfinity) +// ub = posInfinity; +// else if (ub != posInfinity) +// ub += c * ubounds[j]; +// } +// else { +// if (ubounds[j] == posInfinity) +// lb = -posInfinity; +// else if (lb != -posInfinity) +// lb += c * ubounds[j]; +// if (lbounds[j] == -posInfinity) +// ub = posInfinity; +// else if (ub != posInfinity) +// ub += c * lbounds[j]; +// } +// } +// dv.lbounds.push_back(lb); +// dv.ubounds.push_back(ub); +// } +// dv.is_reduction = is_reduction; + +// return dv.normalize(); +// } + +//----------------------------------------------------------------------------- +// Class: DependenceGraph +//----------------------------------------------------------------------------- + +DependenceGraph DependenceGraph::permute(const std::vector<int> &pi, + const std::set<int> &active) const { + DependenceGraph g; + + for (int i = 0; i < vertex.size(); i++) + g.insert(vertex[i].first); + + for (int i = 0; i < vertex.size(); i++) + for (EdgeList::const_iterator j = vertex[i].second.begin(); + j != vertex[i].second.end(); j++) { + if (active.empty() + || (active.find(i) != active.end() + && active.find(j->first) != active.end())) { + for (int k = 0; k < j->second.size(); k++) { + std::vector<DependenceVector> dv = j->second[k].permute(pi); + g.connect(i, j->first, dv); + } + } else if (active.find(i) == active.end() + && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dv = j->second; + g.connect(i, j->first, dv); + } else { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) + for (int d = 0; d < pi.size(); d++) + if (pi[d] != d) { + dv[k].lbounds[d] = -posInfinity; + dv[k].ubounds[d] = posInfinity; + } + g.connect(i, j->first, dv); + } + } + + return g; +} + +// DependenceGraph DependenceGraph::matrix(const std::vector<std::vector<int> > &M) const { +// DependenceGraph g; + +// for (int i = 0; i < vertex.size(); i++) +// g.insert(vertex[i].first); + +// for (int i = 0; i < vertex.size(); i++) +// for (EdgeList::const_iterator j = vertex[i].second.begin(); j != vertex[i].second.end(); j++) +// for (int k = 0; k < j->second.size(); k++) +// g.connect(i, j->first, j->second[k].matrix(M)); + +// return g; +// } + +DependenceGraph DependenceGraph::subspace(int dim) const { + DependenceGraph g; + + for (int i = 0; i < vertex.size(); i++) + g.insert(vertex[i].first); + + for (int i = 0; i < vertex.size(); i++) + for (EdgeList::const_iterator j = vertex[i].second.begin(); + j != vertex[i].second.end(); j++) + + for (int k = 0; k < j->second.size(); k++) { + if(j->second[k].type != DEP_CONTROL){ + if (j->second[k].isCarried(dim)) + g.connect(i, j->first, j->second[k]); + }else + g.connect(i, j->first, j->second[k]); + + } + + return g; +} + +bool DependenceGraph::isPositive() const { + for (int i = 0; i < vertex.size(); i++) + for (EdgeList::const_iterator j = vertex[i].second.begin(); + j != vertex[i].second.end(); j++) + for (int k = 0; k < j->second.size(); k++) + if (!j->second[k].isPositive()) + return false; + + return true; +} + +bool DependenceGraph::hasPositive(int dim) const { + for (int i = 0; i < vertex.size(); i++) + for (EdgeList::const_iterator j = vertex[i].second.begin(); + j != vertex[i].second.end(); j++) + for (int k = 0; k < j->second.size(); k++) + if (!j->second[k].hasPositive(dim)) + return false; + + return true; +} + +bool DependenceGraph::hasNegative(int dim) const { + for (int i = 0; i < vertex.size(); i++) + for (EdgeList::const_iterator j = vertex[i].second.begin(); + j != vertex[i].second.end(); j++) + for (int k = 0; k < j->second.size(); k++) + if (!j->second[k].hasNegative(dim)) + return false; + + return true; +} @@ -0,0 +1,85 @@ +#ifndef DEP_HH +#define DEP_HH + +#include <omega.h> +#include "graph.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +enum DependenceType { DEP_W2R, DEP_R2W, DEP_W2W, DEP_R2R, DEP_CONTROL, DEP_UNKNOWN }; + +class DependenceVector; +typedef std::vector<DependenceVector> DependenceList; + +struct DependenceVector { + DependenceType type; + IR_Symbol *sym; + + bool is_reduction; // used to identify a class of flow dependence + // that can be broken + std::vector<omega::coef_t> lbounds; + std::vector<omega::coef_t> ubounds; + + bool quasi; + bool is_scalar_dependence; + DependenceVector() { + type = DEP_UNKNOWN; + sym = NULL; + is_reduction = false; + quasi = false; + is_scalar_dependence = false; + } + // DependenceVector(int size); + DependenceVector(const DependenceVector &that); + ~DependenceVector() {delete sym;} + DependenceVector &operator=(const DependenceVector &that); + + bool is_data_dependence() const; + bool is_control_dependence() const; + bool has_negative_been_carried_at(int dim) const; + bool has_been_carried_at(int dim) const; + bool has_been_carried_before(int dim) const; + + // the following functions will be cleaned up or removed later + bool isZero() const; + bool isPositive() const; + bool isNegative() const; + bool isAllPositive() const; + bool isAllNegative() const; + bool isZero(int dim) const; + bool hasPositive(int dim) const; + bool hasNegative(int dim) const; + bool isCarried(int dim, omega::coef_t distance = posInfinity) const; + bool canPermute(const std::vector<int> &pi) const; + + std::vector<DependenceVector> normalize() const; + std::vector<DependenceVector> permute(const std::vector<int> &pi) const; + DependenceVector reverse() const; + // std::vector<DependenceVector> matrix(const std::vector<std::vector<int> > &M) const; + DependenceType getType() const; + friend std::ostream& operator<<(std::ostream &os, const DependenceVector &d); +}; + + + +class DependenceGraph: public Graph<Empty, DependenceVector> { + +protected: + int num_dim_; + +public: + DependenceGraph(int n) { num_dim_ = n; } + DependenceGraph() { num_dim_ = 0; } + ~DependenceGraph() {} + int num_dim() const { return num_dim_; } +// DependenceGraph permute(const std::vector<int> &pi) const; + DependenceGraph permute(const std::vector<int> &pi, + const std::set<int> &active = std::set<int>()) const; + // DependenceGraph matrix(const std::vector<std::vector<int> > &M) const; + DependenceGraph subspace(int dim) const; + bool isPositive() const; + bool hasPositive(int dim) const; + bool hasNegative(int dim) const; +}; + +#endif diff --git a/examples/chill/gemm.c b/examples/chill/gemm.c new file mode 100644 index 0000000..355bafe --- /dev/null +++ b/examples/chill/gemm.c @@ -0,0 +1,15 @@ +int main() { + + float a[512][512], b[512][512], c[512][512]; + + int i, j, k; + int n; + for (j = 0; j < n; j++) + for (k = 0; k < n; k++) + for (i = 0; i < n; i++) { + c[i][j] = c[i][j] + a[i][k] * b[k][j]; + } + + return 0; +} + diff --git a/examples/chill/gemm.script b/examples/chill/gemm.script new file mode 100644 index 0000000..ed91567 --- /dev/null +++ b/examples/chill/gemm.script @@ -0,0 +1,31 @@ +#matrix multiply large array size for intel machine +source: gemm.c +procedure: main +format: rose +loop: 0 + +TI = 128 +TJ = 8 +TK = 512 +UI = 2 +UJ = 2 + +permute([3,1,2]) +tile(0,2,TJ) +#print space +tile(0,2,TI) +#print space +tile(0,5,TK) +#print space + +datacopy(0,3,a,false,1) +#print space + +datacopy(0,4,b) +print +unroll(0,4,UI)#print space +print +unroll(0,5,UJ) +#print space +print + diff --git a/examples/chill/gemv.c b/examples/chill/gemv.c new file mode 100644 index 0000000..610d4cb --- /dev/null +++ b/examples/chill/gemv.c @@ -0,0 +1,15 @@ +#define N 10 + +int main() { + // int n; + float a[N]; + float b[N]; + float c[N][N]; + + int i, j; + + for (i = 1; i < N; i++) + for (j = 1; j < N; j++) + a[i] = a[i] + c[i][j] * b[j]; + +} diff --git a/examples/chill/gemv.script b/examples/chill/gemv.script new file mode 100644 index 0000000..f1d5f89 --- /dev/null +++ b/examples/chill/gemv.script @@ -0,0 +1,9 @@ +source: gemv.c # matrix-vector multiply +procedure: main +format : rose +loop: 0 + + + +original() +print diff --git a/examples/chill/jacobi1.c b/examples/chill/jacobi1.c new file mode 100644 index 0000000..0fcaee4 --- /dev/null +++ b/examples/chill/jacobi1.c @@ -0,0 +1,13 @@ +#define N 512 + +int main() { + int i, t; + + float a[N][N]; + + for (t = 2; t <= 100; t++) + for (i = 2; i <= N - 1; i++) + a[t][i] = a[t - 1][i - 1] + a[t - 1][i] + a[t - 1][i + 1]; + + return 0; +} diff --git a/examples/chill/jacobi1.script b/examples/chill/jacobi1.script new file mode 100644 index 0000000..c0dec8d --- /dev/null +++ b/examples/chill/jacobi1.script @@ -0,0 +1,18 @@ +# +# tiling perfect jacobi loop nest with time step, use +# unimodular transformation first (only applicable to the +# perfect loop nest) to make tiling legal. +# + +source: jacobi1.c +procedure: main +format : rose +loop: 0 + +print dep + +nonsingular([[1,0],[1,1]]) # unimodular matrix, determinant is one +tile(0,2,64) + +print dep +print diff --git a/examples/chill/jacobi2.c b/examples/chill/jacobi2.c new file mode 100644 index 0000000..b8d8d7b --- /dev/null +++ b/examples/chill/jacobi2.c @@ -0,0 +1,15 @@ +#define N 512 + +int main() { + double a[N]; + double b[N]; + int t, i; + for (t = 1; t <= 100; t++) { + for (i = 2; i <= N - 1; i++) + b[i] = (double) 0.25 * (a[i - 1] + a[i + 1]) + (double) 0.5 * a[i]; + + for (i = 2; i <= N - 1; i++) + a[i] = b[i]; + } + return 0; +} diff --git a/examples/chill/jacobi2.script b/examples/chill/jacobi2.script new file mode 100644 index 0000000..afe14c6 --- /dev/null +++ b/examples/chill/jacobi2.script @@ -0,0 +1,21 @@ +# +# tiling imperfect jacobi loop nest, more details in the paper +# "Automatic Tiling of Iterative Stencil Loops" by Zhiyuan Li and +# Yonghong Song, TOPLAS, 2004. +# + +source: jacobi2.c +procedure: main +format: rose +loop: 0 + +print dep + +original() +shift([1], 2, 1) +fuse([0,1], 2) # optional +skew([0,1], 2, [2,1]) +tile(0, 2, 32, 1) + +print dep +print diff --git a/examples/chill/unroll.c b/examples/chill/unroll.c new file mode 100644 index 0000000..68f4633 --- /dev/null +++ b/examples/chill/unroll.c @@ -0,0 +1,31 @@ +#define N 14 +void foo(int n, float* x, float* y, float* z, float* f3, float* f1, float* w) { + int dt; + + int i, j; + + for (i = 1; i <= 14; i++) + x[i] = 1.0; + + for (i = 1; i <= 14; i += 3) + y[i] = 1.0; + + for (i = N + 1; i <= N + 20; i += 3) + z[i] = 1.0; + + for (i = 0; i <= N; i++) { + for (j = i; j <= i + N; j++) + f3[i] = f3[i] + f1[j] * w[j - i]; + f3[i] = f3[i] * dt; + } + + return 0; +} + +int main() { + float x[N], y[N], z[N], f3[N], f1[N], w[N]; + + foo(N, x, y, z, f3, f1, w); + return 0; +} + diff --git a/examples/chill/unroll.script b/examples/chill/unroll.script new file mode 100644 index 0000000..e64acb6 --- /dev/null +++ b/examples/chill/unroll.script @@ -0,0 +1,35 @@ +# +# Test unroll-and-jam. The last loop adapted from the simple +# convolution example from p463 of "Optimizing Compilers for +# Modern Architectures", by Randy Allen and Ken Kennedy. +# + +source: unroll.c +procedure: foo +format: rose +# fully unroll a loop with known iteration count +loop: 0 +original() +unroll(0,1,3) +print +print space + + +# a strided loop +loop: 1 +original() +unroll(0,1,2) +print +print space + +# lower and upper bounds are not constant +loop: 2 +original() +unroll(0,1,20) +print + +# parallelogram iteration space +loop: 3 +original() +unroll(0,1,2) +print diff --git a/examples/cuda-chill/cp.c b/examples/cuda-chill/cp.c new file mode 100644 index 0000000..837d7a6 --- /dev/null +++ b/examples/cuda-chill/cp.c @@ -0,0 +1,29 @@ +#define N 1 + +#define VOLSIZEY 512 +#define VOLSIZEX 512 +#define VOLSIZEZ 1 +#define ATOMCOUNT 4000 +#define GRIDSPACING 0.1 +#define zDim 0 + +extern float sqrtf(float); + +void cenergy_cpu(float atoms[ATOMCOUNT*4],float *energy,float z) +{ +int i,j,n;float dx,dy,dz; + + for (j=0; j<VOLSIZEY; j++) { + for (i=0; i<VOLSIZEX; i++) { + for (n=0;n<ATOMCOUNT;n+=4) { + dx = (GRIDSPACING * i) - atoms[n]; + dy = (GRIDSPACING * j) - atoms[n+1]; + dz = z - atoms[n+2]; + energy[(j*VOLSIZEX + i)+VOLSIZEX*VOLSIZEY*zDim] += atoms[n+3]/sqrtf( (dx*dx) + (dy*dy)+ (dz*dz) ) ; + } + + + } + } +} + diff --git a/examples/cuda-chill/cp.lua b/examples/cuda-chill/cp.lua new file mode 100644 index 0000000..1ef2264 --- /dev/null +++ b/examples/cuda-chill/cp.lua @@ -0,0 +1,46 @@ +--CUBLAS 2 MM Multiply + +--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you +--call init() and use global variables to specify procedure and loop + +--Second parameter is procedure # and third is loop # +init("cp.c", "cenergy_cpu", 0) + +dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers, + --copy_to_shared methods +V=512 +N=4000 +N=1 + +Tj=32 +Ti=16 +Tii=16 +Tjj=16 + +--normalize_index("j") +--normalize_index("i") +print_code() +normalize_index("n") +-- TILE COMMANDS ZEROOOOOOOOOOO:3 +--permute(0,{"i","j","n"}) +--tile_by_index({"i","j"},{Ti,Tj},{l1_control="ii",l2_control="jj"},{"ii","jj","i","j","n"})--CU=-1 +tile_by_index({"j","i"},{Tj,Ti},{l1_control="jj",l2_control="ii"},{"jj","ii","j","i","n"})--CU=-1 +--tile_by_index({"n"},{Tn},{l1_control="nn"},{"jj","ii","nn","j","i","n"})--CU=-1 + +--tile_by_index({"j","i"},{Tjjj,Tiii},{l1_control="jjj",l2_control="iii"},{"jj","ii","nn","jjj","j","iii","i","n"})--CU=3 +--tile_by_index({"i","j"},{Tii,Tjj},{l1_control="iii",l2_control="jjj"},{"ii","jj","i","iii","j","jjj","n"})--CU=3 +--tile_by_index({"j"}, {Tn}, {l1_control="j",l1_tile="jjj"}, {"ii", "jj", "nn","jjj","j","i","n"}) +--tile_by_index({"i"}, {Tii}, {l1_control="iii",l1_tile="i"}, {"ii", "jj", "iii","i","j","n"}) +print_code() +cudaize("kernel_GPU",{atoms=N*4,energy=V*V*1},{block={"jj","ii"}, thread={"j","i"}})--CU=3 +--cudaize("kernel_GPU",{atoms=N*4,energy=V*V*1},{block={"ii","jj"}, thread={"i","j"}})--CU=3 +print_code() +copy_to_shared("tx","atoms",-16) +copy_to_registers("tx","energy") +--copy_to_texture("atoms") +--unroll_to_depth(1) +--unroll(0,9,0) +--unroll(0,5,0) + +--unroll(0,8,256) +print_code() diff --git a/examples/cuda-chill/cudaize.lua b/examples/cuda-chill/cudaize.lua new file mode 100644 index 0000000..7359cca --- /dev/null +++ b/examples/cuda-chill/cudaize.lua @@ -0,0 +1,1004 @@ + +-- THIS IS CUDAIZE.LUA + +function table.contains_key(table, key) + for k in pairs(table) do + if k == key then + return true + end + end + return false +end + +function valid_indices(stmt, indices) + --print( "valid_indices() lua calling C cur_indices") + --io.flush() + cur = cur_indices(stmt) + --print("Cur indices "..list_to_string(cur)) + for idx in pairs(indices) do + if not table.contains_key(cur,idx) then + return false + end + end + return true +end + +function next_clean_level(cur_idxs,level) + --print("next_clean_level( ..., "..level.." )") + --print(string.format("indices_at_each_level %s ",list_to_string(cur_idxs) )) + + --print("loop to "..#cur_idxs) + for i=level+1,#cur_idxs do + --print("Checking level "..i.." = '"..cur_idxs[i].."'") + if (# cur_idxs[i] > 0) then + --print("Good enough"..(# cur_idxs[i])) + --print("returning "..i) + return i + end + end + return -1 --sentinal that there were no non-dummy indices left +end + +function build_order(final_order, tile_idx_names, ctrl_idx_names, tile_idx_map, cur_level) + order = {} + --print("\nbuild_order()") + --print("build_order(): final_order = ( "..list_to_string(final_order).." )") + --print("build_order(): ctrl_idx_names = ("..list_to_string(ctrl_idx_names).." )") + --print("cur_level "..cur_level.."") + --io.flush() + + for i,k in ipairs(final_order) do + skip = false + cur = final_order[i] + --print("\ncur "..cur.." = final_order["..i.."] = "..final_order[i].." ") + --control loops below our current level should not be in the current order + for j=cur_level+2,# ctrl_idx_names do + --print("j "..j.." final_order["..i.."] = "..final_order[i].." ") + if ctrl_idx_names[j] == final_order[i] then + skip = true + --print("SKIP "..final_order[i].." ") + --io.flush() + end + end + --possibly substitute tile indices ifn necessar + if table.contains_key(tile_idx_map,final_order[i]) then + approved_sub = false + sub_string = tile_idx_map[final_order[i]] + for j=cur_level+2,# tile_idx_names do + if tile_idx_names[j] == sub_string then + approved_sub = true + end + end + if approved_sub then + cur = sub_string + end + end + if not skip then + table.insert(order,cur) + end + end + return order +end + +function list_to_string(str_list) + --Helpful debug output + l = "" + for i,str in ipairs(str_list) do + if i > 1 then + l = l .. ", " .. str + else + l = str + end + end + return l +end + + +function find_cur_level(stmt,idx) + --Search cur_indices for a idx at stmt + cur = cur_indices(stmt) + --print(string.format("find_cur_level(stmt %d, idx %s) Cur indices %s", stmt, idx, list_to_string(cur))) + for i,cidx in ipairs(cur) do + if cidx == idx then + --print(string.format("found it at index %d", i)) + return i + end + end + error("Unable to find "..idx.." in current list of indices") +end + + +function chk_cur_level(stmt,idx) + --Search cur_indices for a idx at stmt + cur = cur_indices(stmt) + for i,cidx in ipairs(cur) do + if cidx == idx then + return i + end + end + return -1 +end + + +function find_offset(cur_order, tile, control) + --print("Looking for tile '"..tile.."' and control '"..control.."' in ( "..list_to_string(cur_order)..", )") + idx1 = -1 + idx2 = -1 + for i,cur in ipairs(cur_order) do + if(cur == tile) then + idx1 = i + end + if(cur == control) then + idx2 = i + end + end + if(idx1 < 0) then + error("Unable to find tile " .. tile .. " in current list of indices") + end + if(idx2 < 0) then + error("Unable to find control " .. control .. " in current list of indices") + end + --print("found at level " .. idx2 .. " and " .. idx1) + if(idx2 < idx1) then + return idx2-idx1+1 + else + return idx2-idx1 + end +end + +function tile_by_index(tile_indices, sizes, index_names, final_order, tile_method) + --print "STARTING TILE BY INDEX" + --io.flush() + stmt = 0 --assume stmt 0 + cur = cur_indices(stmt) + --print("Cur indices "..list_to_string(cur)) + if not valid_indices(stmt,tile_indices) then + error('One of the indices in the first parameter were not '.. + 'found in the current set of indices.') + end + if not tile_method then tile_method = counted end + tile_idx_names = {} + for i,s in ipairs(tile_indices) do tile_idx_names[i]=s end --shallow copy + --print("tile_index_names: ['"..list_to_string(tile_indices).."']") + + --print("index_names: ") + --for k,v in pairs(index_names) do print(k,v) end + + --io.flush() + + ctrl_idx_names = {} + tile_idx_map = {} + for k,v in pairs(index_names) do + valid = false + if(string.sub(k,1,1) == "l") then + if string.sub(k,-8) == "_control" then + i = tonumber(string.sub(k,2,-9)) + if i and i >= 1 and i <= (# tile_indices) then + ctrl_idx_names[i] = v + --print(string.format("Handling control %s for loop level %d",v,i)) + --print("control "..k.." name "..v.." ") + valid = true + end + elseif string.sub(k,-5) == "_tile" then + i = tonumber(string.sub(k,2,-6)) + if i and i >= 1 and i <= (# tile_indices) then + --print(string.format("tile %s -> %s",tile_indices[i], v)) + tile_idx_names[i] = v + tile_idx_map[v] = tile_indices[i] + --print(string.format("tile %s -> %s",tile_indices[i], v)) + valid = true + end + end + end + if not valid then error(string.format("%s is not a proper key for specifying ".. + "tile or control loop indices\n", k)) end + end + + --filter out control indices (and do name substitution of unprocessed tile indices) for a given level + cur_order = build_order(final_order, tile_indices, ctrl_idx_names, tile_idx_map, -1) + permute(stmt, cur_order) + + for i,cur_idx in ipairs(tile_indices) do + --print(string.format("i %d cur_idx %s calling build order ********", i-1, cur_idx)) + cur_order = build_order(final_order, tile_indices, ctrl_idx_names, tile_idx_map, i-1) + --Find a offset between tile loop and control loop + -- 0 = control loop one level above tile loop + -- -1 = control loop two levels above tile loop + -- > 0 = tile loop above control loop + -- In the last case, we do two extra tile commands to get the control + -- above the tile and then rely on the final permute to handle the + -- rest + level = find_cur_level(stmt,cur_idx) + offset = find_offset(cur_order, tile_idx_names[i], ctrl_idx_names[i]) + --print(string.format("offset %d", offset)) + + if (offset <= 0) then + --print(string.format("[offset<=0]1tile(%d, %d, %d, %d, %s, %s, %s)",stmt, level, sizes[i], level+offset, tile_idx_names[i], ctrl_idx_names[i], tile_method)) + tile(stmt, level, sizes[i], level+offset, tile_idx_names[i], ctrl_idx_names[i], tile_method) + else + --print(string.format("2tile(%d, %d, %d, %d, %s, %s, %s)", stmt, level, sizes[i], level, tile_idx_names[i], ctrl_idx_names[i], tile_method)) + tile(stmt, level, sizes[i], level, tile_idx_names[i], ctrl_idx_names[i], tile_method);--regular level + --flip tile and control loop + --print(string.format("3tile(%d, %d, %d)",stmt, level+1, level+1)) + tile(stmt, level+1, level+1); + --print(string.format("4tile(%d, %d, %d)",stmt, level+1, level)) + tile(stmt, level+1, level); + --print(string.format("\n[offset>0]tile(%d, %d, %d, %d,%s,%s,%s)",stmt, level, sizes[i], level, tile_idx_names[i], ctrl_idx_names[i], tile_method)) + --print_code() + + end + + --Do permutation based on cur_order + --print "permute based on build order calling build_order()" + --print "cur_order = build_order(final_order, tile_indices, ctrl_idx_names, tile_idx_map, i-1)" + cur_order = build_order(final_order, tile_indices, ctrl_idx_names, tile_idx_map, i-1) + --print "permute(stmt, cur_order);" + permute(stmt, cur_order); + --print "\nafter permute(), code is:" + --print_code() + end + --print "ENDING TILE BY INDEX" + --print_code() +end + +function normalize_index(index) + stmt = 0 --assume stmt 0cur = cur_indices(stmt) + --print("Cur indices "..list_to_string(cur)) + l = find_cur_level(stmt, index) + tile(stmt, l, l) + --print(string.format("\n[Normalize]tile(%d, %d, %d)",stmt, l,l)) +end + +function is_in_indices(stmt, idx) + cur = cur_indices(stmt) + for i=0,#cur,1 do + if(cur[i]==idx) then + return true + end + end + return false + +end + + +function copy_to_registers(start_loop, array_name) + + --print("\n\n****** starting copy to registers") + io.flush() + + stmt = 0 --assume stmt 0 + + -- [Malik] first we make sure that tx and ty are consecutive loops in the 2D thread setup, otherwise all levels for subsequent operations are messed up. Start logic. + cur = cur_indices(stmt) + table_Size = table.getn(cur) + + --print(string.format("Cur indices %s,",list_to_string(cur))) + --print(string.format("The table size is %d", table_Size)) + --table.foreach(cur, print) + --print_code() + + level_tx = -1 + level_ty = -1 + if is_in_indices(stmt,"tx") then level_tx = find_cur_level(stmt,"tx") end + if is_in_indices(stmt,"ty") then level_ty = find_cur_level(stmt,"ty") end + --print(string.format("level_tx %d level_ty %d", level_tx, level_ty)) + + ty_lookup_idx = "" + org_level_ty = level_ty + + --if(cur[level_tx+1]~=nil and cur[level_tx+1]~="") then ty_lookup = ty_lookup+1 end + if(cur[level_ty+1]~=nil and cur[level_ty+1]~="") then + --print(string.format("IF cur[%d] = %s", level_ty+1, cur[level_ty+1])) + ty_lookup_idx = cur[level_ty+1] + else + --if cur[level_ty] ~= nil then print(string.format("ELSE ty_lookup_idx = cur[%d] = %s", level_ty, cur[level_ty])) -- TODO + --else print "ELSE (dangerous)" end + ty_lookup_idx = cur[level_ty] -- may assign nil !? + end + --if ty_lookup_idx ~= nil then print(string.format("ty_lookup_idx '%s'", ty_lookup_idx)) -- TODO + --else print "ty_lookup_idx is NIL" + --end + + if level_ty > 0 then + --print(string.format("\ntile3(%d,%d,%d)",stmt,level_ty,level_tx+1)) + tile(stmt,level_ty,level_tx+1) + end + --print_code() + + --print("\ntylookup is %d",ty_lookup) + --exit(0) + -- + cur = cur_indices(stmt) + table_Size = table.getn(cur) + --print(string.format("Cur indices %s,",list_to_string(cur))) + --print("The table size is "..table.getn(cur)) + --table.foreach(cur, print) + + if is_in_indices(stmt,"tx") then level_tx = find_cur_level(stmt,"tx") end + if ty_lookup_idx then + if is_in_indices(stmt,ty_lookup_idx) then level_ty = find_cur_level(stmt,ty_lookup_idx) end + end + + ty_lookup = 1 + idx_flag = -1 + -- find the level of the next valid index after ty+1 + --print(string.format("\nlevel_ty %d", level_ty)) + if level_ty > 0 then + --print(string.format("table_Size %d", table_Size)) + for num= level_ty+ty_lookup,table_Size do + --print(string.format("num=%d cur[num] = '%s'",num, cur[num])) + if(cur[num] ~= "") then + idx_flag = find_cur_level(stmt,cur[num]) + --print (string.format("idx_flag = %d", idx_flag)) + break + end + end + end + + --print(string.format("\n(first) I am checking all indexes after ty+1 %s",idx_flag)) + --print_code() + --print "" + + how_many_levels = 1 + startat = idx_flag + 1 + if startat == 0 then startat = 1 end -- avoid attempt to examine an illegal array offset + --print(string.format("idx_flag = %d I will check levels starting with %d", idx_flag, idx_flag+1)) + + for ch_lev = startat,table_Size,1 do -- was for ch_lev = idx_flag+1,table_Size,1 do + --print(string.format("ch_lev %d", ch_lev)) + if(cur[ch_lev] ~= nil and cur[ch_lev] ~= "") then + --print(string.format("cur[%d] = '%s'", ch_lev, cur[ch_lev])) + how_many_levels = how_many_levels+1 + end + end + --print("\nHow Many Levels",how_many_levels) + + -- change this all to reflect the real logic which is to normalize all loops inside the thread loops. + if(how_many_levels <2) then + while( idx_flag >= 0) do + for num = level_ty+ty_lookup,(table_Size) do + --print(string.format("at top of loop, num is %d", num)) + --print(string.format("num %d", num)) + --print(string.format("cur[num] = '%s'", cur[num])) + if(cur[num] ~= "") then + idx=cur[num] + --print(string.format("idx '%s'", idx)) + + curlev = find_cur_level(stmt,idx) + --print(string.format("curlev %d", curlev)) + + --print_code() + --print(string.format("\n[COPYTOREG]tile(%d,%d,%d)",stmt,find_cur_level(stmt,idx),level_tx)) + tile(stmt,find_cur_level(stmt,idx),find_cur_level(stmt,idx)) + curlev = find_cur_level(stmt,idx) + --print(string.format("curlev %d", curlev)) + tile(stmt,find_cur_level(stmt,idx),level_tx) + --print(string.format("hehe '%s'",cur[num])) + + cur = cur_indices(stmt) + --print("Cur indices INSIDE"..list_to_string(cur)) + table_Size = table.getn(cur) + --print(string.format("Table Size is: %d",table_Size)) + level_tx = find_cur_level(stmt,"tx") + --print(string.format("\n level TX is: %d",level_tx)) + level_ty = find_cur_level(stmt,ty_lookup_idx) + --print(string.format("\n level TY is: %d",level_ty)) + idx_flag = -1 + --print "idx_flag = -1" + + -- find the level of the next valid index after ty+1 + + -- the following was num, which conflicts with loop we're already in, and otherwise wasn't used (?) + for num= level_ty+ty_lookup,table_Size do + --print(string.format("num mucking num = %d", num)) + if(cur[num] ~= nil and cur[num] ~= "") then + idx_flag = find_cur_level(stmt,cur[num]) + --print("\n(second) I am checking all indexes after ty+1 %s",cur[num]) + break + end + end + --print(string.format("num mucked to %d idx_flag = %d", num, idx_flag)) + + end + --print(string.format("at bottom of loop, num is %d", num)) + end + end + end + --print "done with levels" + + + + + --print "ARE WE SYNCED HERE?" + --print_code() + --print("\ntile(%d,%d,%d)",stmt,level_k,level_k) + --tile(stmt,level_k,level_k) + + -- [Malik] end logic + --print_code() + start_level = find_cur_level(stmt, start_loop) + --We should hold contant any block or tile loop + block_idxs = block_indices() + thread_idxs = thread_indices() + --print("\nblock indices are") + --table.foreach(block_idxs, print) + --print("\nthread indices are") + --table.foreach(thread_idxs, print) + --print(string.format("\nStart Level: %d",start_level)) + + hold_constant = {} + --print("\n Now in Blocks") + for i,idx in ipairs(block_idxs) do + --print(string.format("\n Idx:%s : Level: %d",idx,find_cur_level(stmt,idx))) + if find_cur_level(stmt,idx) >= start_level then + table.insert(hold_constant, idx) + --print(string.format("\nJust inserted block %s in hold_constant",idx)) + end + end + + + --print("\n Now in Threads") + for i,idx in ipairs(thread_idxs) do + --print(string.format("\n Idx:%s : Level: %d",idx,find_cur_level(stmt,idx))) + if find_cur_level(stmt,idx) >= start_level then + table.insert(hold_constant, idx) + --print(string.format("\nJust inserted thread %s in hold_constant",idx)) + end + end + + --print "\nhold constant table is: " + --table.foreach(hold_constant, print) + + --print("\nbefore datacopy pvt") + old_num_stmts = num_statements() + --print_code() + --print(string.format("\n[DataCopy]datacopy_privatized(%d, %s, %s, vector having privatized levels)",stmt, start_loop, array_name)) + --table.foreach(hold_constant, print) + datacopy_privatized(stmt, start_loop, array_name, hold_constant) + + --print(hold_constant) + new_num_stmts = num_statements() + --print("\nthe num of statements:%d\n",new_num_stmt) + --print_code() + --exit(0) + -- [Malik] normalize the copy loops created. + cur = cur_indices(old_num_stmts) + --print("Cur indices "..list_to_string(cur)) + for cidx,i in ipairs(cur) do + if i ~= "tx" and i~="ty" and i~="bx" and i~="by" then + --tile(old_num_stmts,find_cur_level(old_num_stmts,i),find_cur_level(old_num_stmts,i)) + --print("\nTILE OF REG: tile(%d,%d,%d)",old_num_stmts,find_cur_level(old_num_stmts,i),find_cur_level(old_num_stmts,i)) + end + end + --print_code() + --print("\nthe num of statements OLD+1 :",(old_num_stmts+1)) + + +--[[ + is this commented out? why yes, yes it is block comment + if( (old_num_stmts+1) <= new_num_stmts) then + cur = cur_indices(old_num_stmts+1) + --print("Cur indices+1 "..list_to_string(cur)) + for cidx,i in ipairs(cur) do + if i ~= "tx" and i~="ty" and i~="bx" and i~="by" then + tile(old_num_stmts+1,find_cur_level(old_num_stmts+1,i),find_cur_level(old_num_stmts+1,i)) + --print("\nTILE OF REG: tile(%d,%d,%d)",old_num_stmts+1,find_cur_level(old_num_stmts+1,i),find_cur_level(old_num_stmts+1,i)) + end + end + end +--]] + + + --Unroll to the last thread level + --for stmt=old_num_stmts,new_num_stmts-1 do + -- level = find_cur_level(stmt,thread_idxs[#thread_idxs])--get last thread level + --if level < #cur_indices(stmt) then + -- unroll(stmt,level+1,0) + --print(string.format("\n[Unroll]unroll(%d, %d, 0)",stmt, level+1)) + ----print_code() + --end + --end + io.flush() + --print("****** ending copy to registers\n\n") + --io.flush() +end + +function copy_to_shared(start_loop, array_name, alignment) + --print(string.format("\nstarting copy to shared(%s, %s, %d )",start_loop,array_name,alignment)) + stmt = 0 --assume stmt 0 + cur = cur_indices(stmt) + --print("Cur indices "..list_to_string(cur)) + + start_level = find_cur_level(stmt, start_loop) + --print(string.format("start_level %d", start_level)) + + old_num_stmts = num_statements() + --print(string.format("old_num_statements %d", old_num_stmts)) + + --Now, we give it indices for up to two dimentions for copy loop + copy_loop_idxs = {"tmp1","tmp2"} + --print(string.format("\n[DataCopy]datacopy(%d, %d, %s, {\"tmp1\",\"tmp2\"},false,0,1,%d,true)",stmt, start_level, array_name, alignment)) + datacopy(stmt, start_level, array_name, copy_loop_idxs, false, 0, 1, alignment,true) + + add_sync(stmt,start_loop) + new_num_stmts = num_statements() + + --This is fairly CUBLAS2 specific, not sure how well it generalizes, + --but for a 2D copy, what we want to do is "normalize" the first loop + --"tmp1" then get its hard upper bound. We then want to tile it to + --make the control loop of that tile "ty". We then tile "tmp2" with a + --size of 1 and make it "tx". + --print(string.format("fairly CUBLAS2 specific, OLD %d NEW %d", old_num_stmts, new_num_stmts )) + + for stmt=old_num_stmts,new_num_stmts-1 do + --print(string.format("for stmt = %d", stmt)) + was_no_error, level = pcall(find_cur_level, stmt, "tmp2") + + if was_no_error then + --print_code() + --print("\nCopy to shared: [If was no error]\n") + find_cur_level(stmt,"tmp2") + tile(stmt, level, level) + + lower,upper = hard_loop_bounds(stmt, level) + upper = upper + 1 + --print(string.format("lower %d upper %d", lower, upper)) + + tx,ty = thread_dims() + --print("2-loop cleanup: lower, upper: "..lower..", "..upper..", tx: "..tx) + + level = find_cur_level(stmt,"tmp1") + --print(string.format("level %d", level)) + + if tx == upper and ty == 1 then + --print(string.format("tx = %d upper = %d ty = %d", tx, upper, ty)) + --print "Don't need" + + --Don't need an extra tile level, just move this loop up + second_level = find_cur_level(stmt,"tmp2") + --print(string.format("\n[Tile0]tile(%d, %d, 1, %d,%s,%s,counted)",stmt, second_level, level, "tx", "tx")) + tile(stmt, second_level, 1, level, "tx", "tx", counted) + else + --print "DO need?" + --print_code() + if(ty == 1) then new_ctrl = "tmp3" else new_ctrl = "ty" end + + +--[[ Commenting out a block of Gabe's code in this control flow + -- level = find_cur_level(stmt,"tmp1") + tile(stmt, level, level) + + lower,upper = hard_loop_bounds(stmt, level) + upper = upper + 1 + --print_code() + --print("2-loop cleanup: lower, upper: "..lower..", "..upper..", tx: "..tx..", level: "..level) + if(math.ceil(upper/ty) > 1)then + tile(stmt, level, math.ceil(upper/ty), level, "tmp", new_ctrl, counted) + --print(string.format("\n[Tile1]tile(%d, %d, %f[%d,%d], %d,%s,%s,counted)",stmt, level, math.ceil(upper/ty),upper,ty, level, "tmp", new_ctrl)) + else + tile(stmt, level, math.ceil(upper/ty), level, "ty", new_ctrl, counted) + --print(string.format("\n[Tile1]tile(%d, %d, %f[%d,%d], %d,%s,%s,counted)",stmt, level, math.ceil(upper/ty),upper,ty, level, "tx", new_ctrl)) + end + + --print_code() + -- [Malik] If here we have the loop upper bound > tx, then we should tile once more after the next tile, to carve out the correct tx. + lower1,upper1 = hard_loop_bounds(stmt,level) + level1 = level + stmt1 = stmt + -- [Malik] Do the tile after the second level tile with if condition. Just to keep the original order, the tile is being pushed to the end. + + --print("[Malik]-loop cleanup: lower1, upper1: "..lower1..", "..upper1..", tx: "..tx..", level:"..level1) + + --print_code() + --level = find_cur_level(stmt,"tmp") + --tile(stmt,level,level) + --print_code() + + --[Malik] if you are moving the loop above the level1, you need to update level1 with new position which would be level1+2 or second_level + if(level <= level1) then level1 = level1+2 end + --print(string.format("\n[Tile2]tile(%d, %d, 1, %d,%s,%s,counted)",stmt, second_level, level, "tx", "tx")) + --print("\n----------------------------------") + --print_code() + --print("\n**********************************") + --print("[Malik]-loop cleanup: lower1, upper1: "..lower1..", "..upper1..", tx: "..tx..", level:"..level1) + -- [Malik] If the upper bound > tx, we do another tile to carve out the correct tx from a bigger loop. Else just normalize the bounds. + if( upper1 > ty) then + third_level = find_cur_level(stmt1,"tmp") + --print("\n\n\n\t\t\t\tthirdlevel:"..third_level) + tile(stmt1, third_level, ty, third_level, "ty", "tmp", counted) + --print(string.format("\n[Tile3]tile(%d, %d, %d,%d,%s,%s,counted)",stmt1, third_level, ty,third_level, "ty", "tmp")) + tile(stmt1,third_level+1,third_level+1) + --print(string.format("\n[Tile3]tile(%d, %d, %d)",stmt1, third_level+1, third_level+1)) + tile(stmt1,third_level+1,third_level) + --print(string.format("\n[Tile3]tile(%d, %d, %d)",stmt1, third_level+1, third_level)) + else + tile(stmt1,level1,level1) + --print(string.format("\n[Tile3ELSE]tile(%d, %d, %d)",stmt1,level1,level1)) + end + + --print("\nStarting tmp2\n");--print_code(); + second_level = find_cur_level(stmt,"tmp2") + lower,upper = hard_loop_bounds(stmt,second_level) + level = second_level + --print("[Malik]-loop cleanup@tmp2: lower, upper: "..lower..", "..upper..", tx: "..tx..", level:"..level) + + if(math.ceil(upper/tx) > 1)then + tile(stmt, second_level,math.ceil(upper/tx), level, "tmp", "tx", counted) + --print(string.format("\n[Tile2]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, second_level,math.ceil(upper/tx),second_level, "tmp", "tx")) + else + tile(stmt, second_level,math.ceil(upper/tx), level, "tx", "tx", counted) + --print(string.format("\n[Tile2]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, second_level,math.ceil(upper/tx),second_level, "tx", "tx")) + end + --print_code() + lower2,upper2 = hard_loop_bounds(stmt,level) + level2 = level + stmt2 = stmt + --print("[Malik]-loop cleanup@tmp2: lower2, upper2: "..lower2..", "..upper2..", tx: "..tx..", level:"..level2) + -- now for the second level. + if( upper2 > tx) then + forth_level = find_cur_level(stmt2,"tmp") + --print("\n\n\n\t\t\t\tforthlevel:"..forth_level) + --print_code() + tile(stmt2, forth_level, 1, forth_level, "tx", "tmp", counted) + --print(string.format("\n[Tile3B]tile(%d, %d, %d,%d,%s,%s,counted)",stmt2, forth_level, tx,forth_level, "ty", "tmp")) + --print_code() + --tile(stmt2,forth_level+1,forth_level+1) + --print(string.format("\n[Tile3B]tile(%d, %d, %d)",stmt2, forth_level+1, forth_level+1)) + --tile(stmt2,forth_level+1,forth_level) + --print(string.format("\n[Tile3B]tile(%d, %d, %d)",stmt2, forth_level+1, forth_level)) + else + new_level = find_cur_level(stmt2,"ty") + tile(stmt2,level2,1,new_level,"tx","tx",counted) + --print(string.format("\n[Tile3BELSE]tile(%d, %d, %d)",stmt2,level2,level2)) + tmp_level = find_cur_level(stmt2,"tmp") + tile(stmt2,tmp_level,tmp_level) + end + + --print_code() + --print("\n----------------------------------") +--]] + + --print_code() + --print("\nStarting tmp2\n");--print_code(); + first_level = find_cur_level(stmt,"tmp1") + second_level = find_cur_level(stmt,"tmp2") + lower,upper = hard_loop_bounds(stmt,second_level) + + --print("[Malik]-loop cleanup@tmp2: lower, upper: "..lower..", "..upper..", tx: "..tx..",first level:"..first_level..",second_level:"..second_level) + + -- Move the fastest changing dimension loop to the outermost,identified by "tmp2" and to be identified as tx. + --print(string.format("\n[fastest]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, second_level,1,first_level, "tx", "tx")) + tile(stmt,second_level,1,first_level,"tx","tx",counted) + --print_code() + + first_level = find_cur_level(stmt,"tmp1") + lower_1,upper_1 = hard_loop_bounds(stmt,first_level) + tx_level = find_cur_level(stmt,"tx") + lower_tx,upper_tx = hard_loop_bounds(stmt,tx_level) + --print(string.format("UL_1 %d %d UL_tx %d %d", lower_1, upper_1, lower_tx, upper_tx)) + + if(math.ceil(upper_tx/tx) > 1)then + --print "ceil I say" + --print(string.format("\n[Tile1]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, tx_level,tx,tx_level, "tx", "tmp1")) + tile(stmt,tx_level,tx,tx_level,"tx","tmp_tx",counted) + --print_code() + + peat = find_cur_level(stmt,"tx") + --print(string.format("\n[Tile1]tile(%d, %d, %d)",stmt, peat, peat)) + tile(stmt, peat, peat ) --find_cur_level(stmt,"tx"),find_cur_level(stmt,"tx")) + --print_code() + + if (find_cur_level(stmt,"tx")>find_cur_level(stmt,"tmp_tx")) then + --print(string.format("\nagain [Tile1]tile(%d, %d, %d)",stmt,find_cur_level(stmt,"tx"),find_cur_level(stmt,"tmp_tx"))) + tile(stmt,find_cur_level(stmt,"tx"),find_cur_level(stmt,"tmp_tx")) + --print_code() + end + --else + --tile(stmt, tx_level,1, tx_level, "tx", "tx", counted) + --print(string.format("\n[Tile2]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, tx_level,1,tx_level, "tx", "tx")) + end + --print_code() + --]] -- this apparently is NOT the end of a block comment + + --print("\nStarting tmp1\n") + -- Handle the other slower changing dimension, the original outermost loop, now identified by "tmp1", to be identified as "ty". + tile(stmt,find_cur_level(stmt,"tmp1"),find_cur_level(stmt,"tmp1")) + --print_code() + + ty_level = find_cur_level(stmt,"tmp1") + lower_ty,upper_ty = hard_loop_bounds(stmt,ty_level) + + tx_level = find_cur_level(stmt,"tx") + lower_tx,upper_tx = hard_loop_bounds(stmt,tx_level) + --print("[Malik]-loop cleanup@tmp1: lowerty, upperty: "..lower_ty..", "..upper_ty..", ty: "..ty..",ty level:"..ty_level..",tx_level:"..tx_level..", stmt: "..stmt) + + --print "before ceil" + if(math.ceil(upper_ty/ty) > 1)then + --print "CEIL IF" + --print("\n Inside upper_ty/ty > 1\n"); + + --print(string.format("\n[Tile2]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, ty_level,ty,ty_level, "ty", "tmp_ty")) + tile(stmt,ty_level,ty,ty_level,"ty","tmp_ty",counted) + --print_code() + + --print(string.format("\n[Tile2-1]tile(%d, %d, %d)",stmt,find_cur_level(stmt ,"ty"),find_cur_level(stmt,"ty"))) + tile(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"ty")) + --print_code() + + ----------------------------------------------------------------------- + ---------------------------------------------------------------------- + cur_idxs = cur_indices(stmt) + --print("\n cur indexes are "..list_to_string(cur_idxs)) + + -- Putting ty before any tmp_tx + idx_flag = -1 + for num= 0,table.getn(cur_idxs) do + if(cur[num] == "tmp_tx") then + idx_flag = find_cur_level(stmt,cur[num]) + break + end + end + --print(string.format("\n (1) so i have found out the value of idx flag as %d",idx_flag) ) + + if(idx_flag >=0 ) then + if (find_cur_level(stmt,"ty")>find_cur_level(stmt,"tmp_ty")) then + --print(string.format("\n[Tile2-2]tile(%d, %d, %d)",stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))) + tile(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty")) + --print_code() + end + end + + -- Now Putting ty before any tmp_ty + idx_flag = -1 + for num= 0,table.getn(cur_idxs) do + if(cur[num] == "tmp_ty") then + idx_flag = find_cur_level(stmt,cur[num]) + break + end + end + --print(string.format("\n IF so i have found out the value of idx flag as %d",idx_flag) ) + if(idx_flag >=0 ) then + --print "one more test" + if ((find_cur_level(stmt,"ty")>find_cur_level(stmt,"tmp_ty"))) then + --print(string.format("\n[Tile2-2]tile(%d, %d, %d)",stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))) + tile(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty")) + --print_code() + end + end + else + --print "CEIL ELSE" + --cur_idxs = cur_indices(stmt) + --print("\n Inside upper_ty/ty <= 1\n"); + + --print(string.format("\n[Tile3]tile(%d, %d, %d,%d,%s,%s,counted)",stmt, ty_level,1,ty_level, "ty", "ty")) + tile(stmt, ty_level,1, ty_level, "ty", "ty", counted) + --print_code() + + --print(string.format("\n[Tile3-1]tile(%d, %d, %d)",stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tx")+1)) + tile(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tx")+1) + --print_code() + + idx_flag = -1 + if(cur_idxs) then + --print "CAN NEVER GET HERE? cur_idxs" + for num= 0,table.getn(cur_idxs) do + if(cur[num] == "tmp_ty") then + idx_flag = find_cur_level(stmt,cur[num]) + break + end + end + end + --print(string.format("\n ELSE so i have found out the value of idx flag as %d",idx_flag) ) + if(idx_flag >=0 ) then + if (find_cur_level(stmt,"ty")>find_cur_level(stmt,"tmp_ty")) then + --print(string.format("tile( stmt %d, level ty %d, level ty %d",stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))) + tile(stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty")) + --print(string.format("\n[Tile3-2]tile(%d, %d, %d)",stmt,find_cur_level(stmt,"ty"),find_cur_level(stmt,"tmp_ty"))) + end + end + end + + --print_code() + end + + + --print "\n\n *** at bottom of if in copy to shared, " + --print_code() + --print "end of if" + + else + --copy to shared only created one level, not two, so we use a different approach (MV & TMV) + --print("\nCopy to shared: [If was error]\n") + level = find_cur_level(stmt,"tmp1") + tile(stmt, level, level) + + --print(string.format("\n[Tile]tile(%d, %d, %d)",stmt, level, level)) + tx,ty = thread_dims() + lower,upper = hard_loop_bounds(stmt, level) + upper = upper+1 --upper bound given as <=, compare to dimensions tx which is < + --print("upper "..upper.." tx "..tx) + if upper == tx then + rename_index(stmt, "tmp1", "tx") + else + --print("upper is not tx") + --TODO: Don't know, maybe do some tileing etc + --print_code() + --print("upper "..upper.." tx "..tx.." stmt: "..stmt.." level: "..level) + tile(stmt, level,tx,level, "tx", "tmp_tx", counted) + --print_code() + + --print("stmt:"..stmt.." level+1: "..level+1) + --print("TILE 7") + tile(stmt, level+1,1,level+1,"tx", "tx",counted) + --print("TILE 3") + tile(stmt,level+1,level) + --print_code() + + if(ty > 1) then + --print_code() + --print("GOING IN") + lower,upper = hard_loop_bounds(stmt, level+1) + --print(string.format("ty %d lower %d upper %d", ty, lower, upper)) + --upper=125 + --print("NOW FOR Y: upper "..upper.." ty "..ty.." stmt: "..stmt.." level: "..(level+1).." bound:"..math.ceil(upper/ty)) + tile(stmt, level+1,math.ceil(upper/ty),level+1, "tmp_ty", "ty", counted) + --tile(stmt, level+2,math.ceil(upper/ty),level+2, "tmp_ty", "ty", counted) + end + --print_code() + --rename_index(stmt, "tmp1", "tx") + --print("Warning: Need to implement some logic here to tile the single level shared copy loop to match thread dimensions") + end + end + --Always add sync + add_sync(stmt,start_loop) + + end + --print("ending copy to shared\n") + --print_code() +end + +function unroll_to_depth(max_depth) + --print(string.format("\n\nunroll_to_depth(%d)", max_depth )) + --print "SYNC UP" + + cur = cur_indices(0) + thread_idxs = thread_indices() + guard_idx = thread_idxs[#thread_idxs] + + --print(string.format("cur indices %s",list_to_string(cur))) + --print(string.format("thread indices %s",list_to_string(thread_idxs))) + --print(string.format("#thread_idxs = %d", #thread_idxs)) + --print(string.format("guard_idx = %s", guard_idx)) + + ---- HERE FIND OUT THE LOOPS WHICH ARE COMMON BETWEEN STATEMENTS + common_loops = {} + comm_loops_cnt = 0 + num_stmts = num_statements() + --print(string.format("num statements %d", num_stmts)) + + for stmt=0,num_stmts-1 do + cur_idxs = cur_indices(stmt) + + --print(string.format("\nSTMT %d Current Indices: %s",stmt,list_to_string(cur_idxs))) + + if(chk_cur_level(stmt,"tx")>0) then + for ii=1,find_cur_level(stmt,"tx")-1 do -- started at 0 + --print(string.format("ii = %d", ii)) -- index starts at 1, what does index 0 do? + --if cur_idxs[ii] == nil then print "cur_idxs[i]] is NIL" + --else print(string.format("cur_idxs[%d] = '%s'", ii, cur_idxs[ii])) -- index starts at 1, what does index 0 do? + --end + + if(cur_idxs[ii] ~= "bx" and cur_idxs[ii] ~= "by" and cur_idxs[ii] ~= nil and cur_idxs[ii] ~= "tx" and cur_idxs[ii] ~= "ty" and cur_idxs[ii] ~= "") then + + --print(string.format("id %s is not in the list", cur_idxs[ii] )) + + for stmt1=stmt+1,num_stmts-1 do + --print(string.format("\nii %d stmt1 is %d", ii, stmt1)) + cur_idxs1 = cur_indices(stmt1) + --print("\nstmt1 cur_idxs1 is "..list_to_string(cur_idxs1)) + + --print(string.format("cur level(%d, %s) = %d", stmt, "tx", find_cur_level(stmt,"tx"))) + + endrange = find_cur_level(stmt,"tx")-1 + --print(string.format("for iii=1, %d do", endrange)) + + for iii=1,find_cur_level(stmt,"tx")-1 do -- started at 0 + --print(string.format("stmt %d ii %d iii %d ", stmt, ii, iii)) + --if(cur_idxs1[iii] ~= nil) then + -- print(string.format("stmt %d ii %d iii %d cur_idxs1[%d] = '%s'", stmt, ii, iii, iii, cur_idxs1[iii])) + --else + -- print(string.format("stmt %d ii %d iii %d cur_idxs1[%d] = NIL", stmt, ii, iii, iii)) + --end + + if(cur_idxs1[iii] ~= "bx" and cur_idxs1[iii] ~= "by" and cur_idxs1[iii] ~= nil and cur_idxs1[iii] ~= "tx" and cur_idxs1[iii] ~= "ty" and cur_idxs1[iii] ~= "") then + if(cur_idxs[ii] == cur_idxs1[iii]) then + --print("\nfound idx:"..cur_idxs[ii]) + --if(comm_loops_cnt == 0) then print "\n\n*** WARNING *** assigning to array index ZERO in Lua" end + common_loops[comm_loops_cnt] = cur_idxs[ii] + --print(string.format("cl[%d] = '%s'", comm_loops_cnt, common_loops[comm_loops_cnt])) + comm_loops_cnt = comm_loops_cnt + 1 + end + end + end + end + end + end + end + end + ---- + --if(comm_loops_cnt>0) then + -- print("\n COMM LOOPS :TOTAL "..comm_loops_cnt..", and are "..list_to_string(common_loops).." this loop :"..common_loops[0]) + --else + -- print "UNROLL can't unroll any loops?" + --end + + + + + repeat + old_num_stmts = num_statements() + --print(string.format("old_num_statements %d", old_num_stmts)) + + for stmt=0,old_num_stmts-1 do + cur_idxs = cur_indices(stmt) + --print(string.format("stmt %d cur_idxs = %s", stmt, list_to_string(cur_idxs))) + if(#cur_idxs > 0) then + gaurd_level = -1 + if(chk_cur_level(stmt,guard_idx)>0) then + gaurd_level = find_cur_level(stmt,guard_idx) + end + --print(string.format("guard_level(sp) = %d", gaurd_level)) + + if(gaurd_level>-1) then + level = next_clean_level(cur_idxs,gaurd_level) + --print(string.format("next clean level %d", level)) + + --need to handle max_depth + num_unrolled = 0 + level_unroll_comm = level + level_arr = {} + while level >= 0 do + --print(string.format("while: level = %d", level)) + + if num_unrolled == max_depth then break end + --print("Unrolling "..stmt.." at level "..(level).." index ".. cur_idxs[gaurd_level+1]) + + level_arr[num_unrolled] = level + num_unrolled = num_unrolled + 1 + + guard_level = find_cur_level(stmt,guard_idx) + level = next_clean_level(cur_idxs,level+1) + end + --dies print("How many levels for unroll commands"..table.getn(level_arr).." which is "..level_arr[0].." and "..level_arr[#level_arr]) + --if(table.getn(level_arr) ~= nil) then + + --print "OK, NOW WE UNROLL" + + if(level_unroll_comm >= 0)then + for i = table.getn(level_arr),0,-1 do + --print(string.format("\ni=%d", i)) + --print(string.format("[Unroll]unroll(%d, %d, 0)",stmt, level_arr[i])) + + unroll(stmt,level_arr[i],0) + --print("finished unroll]]\n") + --print_code() + end + end +------ + end +--[[ + +THERE WAS A BIG BLOCK OF COMMENTED OUT CODE HERE + + +--]] +------ + end + end + new_num_stmts = num_statements() + + until old_num_stmts == new_num_stmts + +end + + diff --git a/examples/cuda-chill/mm.c b/examples/cuda-chill/mm.c new file mode 100644 index 0000000..0efbeeb --- /dev/null +++ b/examples/cuda-chill/mm.c @@ -0,0 +1,10 @@ +#define N 1024 + +void normalMM(float c[N][N], float a[N][N], float b[N][N]) { + int i, j, k; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + for (k = 0; k < N; k++) + c[j][i] = c[j][i] + a[k][i] * b[j][k]; +} diff --git a/examples/cuda-chill/mm.lua b/examples/cuda-chill/mm.lua new file mode 100644 index 0000000..5bde1b0 --- /dev/null +++ b/examples/cuda-chill/mm.lua @@ -0,0 +1,38 @@ +init("mm.c", "normalMM", 0) +dofile("cudaize.lua") +N=1024 +Ti=128 +Tj=64 +Tk=16 +Tii=16 +Tjj=16 + + + + +N=1024 + + + + + + + + + + + + + +tile_by_index({"i","j"},{Ti,Tj},{l1_control="ii",l2_control="jj"},{"ii","jj","i","j","k"})CU=1 + +tile_by_index({"k"},{Tk},{l1_control="kk"},{"ii","jj","kk","i","j","k"})CU=3 + +tile_by_index({"i","j"},{Tii,Tjj},{l1_control="iii",l2_control="jjj"},{"ii","jj","kk","i","iii","j","jjj","k"},1)CU=2 + +cudaize("mm_GPU",{a=1048576,b=1048576,c=1048576},{block={"ii","jj"}, thread={"i","j"}})CU=2 +copy_to_shared("tx","a",-16) +copy_to_shared("tx","b",-16) +copy_to_registers("kk","c") +--print_code() +unroll_to_depth(2) diff --git a/examples/cuda-chill/mpeg4.c b/examples/cuda-chill/mpeg4.c new file mode 100755 index 0000000..7f83bf7 --- /dev/null +++ b/examples/cuda-chill/mpeg4.c @@ -0,0 +1,23 @@ +#define N1 4096 +#define N2 4096 +#define WINDOW_SIZE 16 + +void mpeg4_cpu(float result[N1][N2], float prev[N2+WINDOW_SIZE][N2+WINDOW_SIZE], float curr[WINDOW_SIZE*WINDOW_SIZE]) +{ + unsigned int i; + unsigned int j; + unsigned int k; + unsigned int l; + + for ( i = 0; i < N1; ++i) + for ( j = 0; j < N2; ++j) + for ( k = 0; k < WINDOW_SIZE; ++k) + for ( l = 0; l < WINDOW_SIZE; ++l) + result[i][j] += prev[i+k][j+l] * curr[k*WINDOW_SIZE+l]; + + + + + +} + diff --git a/examples/cuda-chill/mpeg4.lua b/examples/cuda-chill/mpeg4.lua new file mode 100644 index 0000000..f025dc0 --- /dev/null +++ b/examples/cuda-chill/mpeg4.lua @@ -0,0 +1,45 @@ +--CUBLAS 2 MM Multiply + +--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you +--call init() and use global variables to specify procedure and loop + +--Second parameter is procedure # and third is loop # +init("mpeg4.c", "mpeg4_cpu", 0) + +--dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,copy_to_shared methods +dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers,copy_to_shared methods + +N=4096 +M=4096 +W=16 + +--TI 4ust be <= M +--TJ must be <=TI +Ti=32 +Tj=32 +Tii=16 +Tjj=16 +Tk=4 +--permute(0,{"j","i","k","l"}) +tile_by_index({"i","j"},{Ti,Tj},{l1_control="ii",l2_control="jj"},{"ii","jj","i","j","k","l"}) +--tile_by_index({"k","l"},{Tk*2,Tk*2},{l1_control="kk",l2_control="ll"},{"ii","jj","kk","ll","i","j","k","l"}) +--print_code() +--tile_by_index({"k","l"},{Tk,Tk},{l1_control="kk",l2_control="ll"},{"ii","jj","i","j","kk","k","ll","l"}) +tile_by_index({"i","j"},{Tii,Tjj},{l1_control="iii",l2_control="jjj"},{"ii","jj","iii","i","jjj","j","k","l"}) +--print_code() +--normalize_index("j") +--normalize_index("i") +--print_code() +cudaize("kernel_GPU",{curr=W*W,prev=(N+W)*(M+W),result=N*M},{block={"ii","jj"}, thread={"i","j"}}) +--print_code() +copy_to_shared("iii","prev",16) + +copy_to_registers("jjj","result") + +--print_code() +--copy_to_constant_no_tile("curr") +unroll_to_depth(2) +print_code() +print_space() + + diff --git a/examples/cuda-chill/mriq-fh.c b/examples/cuda-chill/mriq-fh.c new file mode 100755 index 0000000..1e924b7 --- /dev/null +++ b/examples/cuda-chill/mriq-fh.c @@ -0,0 +1,38 @@ +#define X 32768 +#define K 256 +struct kValues { + float Kx; + float Ky; + float Kz; + float PhiMag; +}; +extern float sin(float); +extern float cos(float); + +void mriFH_cpu(float *rPhi,float *rRho,float *iRho, float *iPhi, float *rD, float *iD, float *kx, float *ky, float *kz, float *dx, float *dy, float *dz, float *rFHref, float *iFHref) +{ + + float rfh; + float ifh; + float exp; + float cArg; + float sArg; + //float rRho[K]; + //float iRho[K]; + unsigned int k; + unsigned int x; + + + for (x = 0; x < X; ++x) { + for (k = 0; k < K; ++k) { + + exp = 2 * 3.14159 * (kx[k]* dx[x] + ky[k]* dy[x] + kz[k]* dz[x]); + cArg = cos(exp); + sArg = sin(exp); + rFHref[x] += rRho[k]* cArg - iRho[k]* sArg; + iFHref[x] += iRho[k]*cArg + rRho[k]*sArg; + } + + } +} + diff --git a/examples/cuda-chill/mriq-fh.lua b/examples/cuda-chill/mriq-fh.lua new file mode 100755 index 0000000..3277bac --- /dev/null +++ b/examples/cuda-chill/mriq-fh.lua @@ -0,0 +1,73 @@ +--CUBLAS 2 MM Multiply + +--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you +--call init() and use global variables to specify procedure and loop + +--Second parameter is procedure # and third is loop # +init("mriq-fh.c", "mriFH_cpu", 0) + +dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers, + --copy_to_shared methods +N=32768 +M=256 +Tx=256 + + +print_code() +--permute(0,{"j","i"}) +--tile_by_index({"j","i"}, {TI,TJ}, {l1_control="jj", l2_control="ii"}, {"jj","ii", "j", "i"}) +tile_by_index({"x"},{Tx},{l1_control="xx"},{"xx","x","k"}) +--tile_by_index({"x"},{16},{l1_control="xx1"},{"xx","x","xx1","k"}) +--tile_by_index({"j"}, {TI}, {l1_control="jj"}, {"ii","jj", "j", "i"}) +--tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"}) +print_code() + +normalize_index("x") +--normalize_index("i") +print_code() +--tile_by_index({"i"}, {TI}, {l1_control="iii",l1_tile="i"}, {"ii","jj", "iii","j","i"}) +--print_code() +--cudaize("Kernel_GPU", {x=N,y=N,z=N,Qr=N,Qi=N,kVals=M},{block={"jj"}, thread={"j"}}) +cudaize("kernel_GPU",{dx=N,dy=N,dz=N,iRho=M,kx=M,ky=M,kz=M,rFHref=N,iFHref=N,rRho=M},{block={"xx"}, thread={"x"}}) +--copy_to_shared("tx","iRho",-16) +--copy_to_shared("tx","dz",1) +--copy_to_shared("tx","rRho",-16) +--copy_to_registers("tx","rFHref") +--copy_to_registers("tx","rRho") +--copy_to_registers("tx","iRho") +--copy_to_registers("tx","kx") +--copy_to_registers("tx","dx") +--copy_to_registers("tx","ky") +--copy_to_registers("tx","dy") +--copy_to_registers("tx","kz") +--copy_to_registers("tx","dz") +--copy_to_registers("tx","iFHref") +--copy_to_texture("rRho") +--copy_to_texture("kx") +--copy_to_texture("dx") +--copy_to_texture("ky") +--copy_to_texture("dy") +--copy_to_texture("kz") +--copy_to_texture("dz") +--copy_to_texture("iRho") +--print_code()--]] +--unroll(0,4,0) +--copy_to_constant_no_tile("kx") +--copy_to_constant_no_tile("ky") +--copy_to_constant_no_tile("kz") +--copy_to_constant_no_tile("rRho") +--copy_to_constant_no_tile("iRho") + +--unroll_to_depth(1) +print_code() +--[[ +copy_to_Texture("rRho") +copy_to_Texture("kx") +copy_to_Texture("dx") +copy_to_Texture("ky") +copy_to_Texture("dy") +copy_to_Texture("kz") +copy_to_Texture("dz") +copy_to_Texture("iRho") +--unroll_to_depth(2) +--]] diff --git a/examples/cuda-chill/mriq.c b/examples/cuda-chill/mriq.c new file mode 100644 index 0000000..ba4b87c --- /dev/null +++ b/examples/cuda-chill/mriq.c @@ -0,0 +1,33 @@ +#define N 32768 +#define M 3072 +struct kValues { + float Kx; + float Ky; + float Kz; + float PhiMag; +}; +extern float sinf(float); +extern float cosf(float); + +void +ComputeQCPU(int numK, int numX,struct kValues kVals[M],float x[N], float y[N], float z[N],float Qr[N], float Qi[N]) { + float expArg; + float cosArg; + float sinArg; + float phi; + int i; + int j; + numK = M; + numX = N; + for ( i = 0; i < M; i++) { + for ( j = 0; j < N; j++) { + expArg = 6.2831853071795864769252867665590058f * (kVals[i].Kx * x[j] +kVals[i].Ky * y[j] +kVals[i].Kz * z[j]); + cosArg = cosf(expArg); + sinArg = sinf(expArg); + phi = kVals[i].PhiMag; + Qr[j] += phi * cosArg; + Qi[j] += phi * sinArg; + } + } +} + diff --git a/examples/cuda-chill/mriq.lua b/examples/cuda-chill/mriq.lua new file mode 100644 index 0000000..1170111 --- /dev/null +++ b/examples/cuda-chill/mriq.lua @@ -0,0 +1,55 @@ +--CUBLAS 2 MM Multiply + +--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you +--call init() and use global variables to specify procedure and loop + +--Second parameter is procedure # and third is loop # +init("mriq.c", "ComputeQCPU", 0) + +dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers, + --copy_to_shared methods +N=32768 +M=3072 +TI=128 +TJ=128 + +permute(0,{"j","i"}) +--tile_by_index({"j","i"}, {TI,TJ}, {l1_control="jj", l2_control="ii"}, {"jj","ii", "j", "i"}) +tile_by_index({"i"}, {TJ}, {l1_control="ii",l1_tile="i"}, {"ii", "j","i"}) +tile_by_index({"j"}, {TI}, {l1_control="jj"}, {"ii","jj", "j", "i"}) +--tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"}) +--print_code() + +normalize_index("j") +normalize_index("i") +--print_code() +--tile_by_index({"i"}, {TI}, {l1_control="iii",l1_tile="i"}, {"ii","jj", "iii","j","i"}) +--print_code() +cudaize("Kernel_GPU", {x=N,y=N,z=N,Qr=N,Qi=N,kVals=M},{block={"jj"}, thread={"j"}}) + +copy_to_shared("tx","kVals",1) +--copy_to_shared("tx","x",1) +--copy_to_shared("tx","y",1) +--copy_to_shared("tx","z",1) + +--copy_to_texture("kVals") +--datacopy(0, 3, "kVals", {"tt","t"},false,0,1,-16,true) +--print_code() +--datacopy_privatized(0,"tx","kVals",{"tx"}) +--copy_to_registers("tx","kVals") +copy_to_registers("ii","x") +copy_to_registers("ii","y") +copy_to_registers("ii","z") +copy_to_registers("ii","Qi") +copy_to_registers("ii","Qr") +--[[datacopy_privatized(0,"tx","x",{"tx"}) +datacopy_privatized(0,"tx","y",{"tx"}) +datacopy_privatized(0,"tx","z",{"tx"}) +datacopy_privatized(0,"tx","Qi",{"tx"}) +datacopy_privatized(0,"tx","Qr",{"tx"}) + + +]]-- +--unroll(0,5,64) +print_code() +--unroll_to_depth(1) --won't unroll past thread/loop mapping, unrolls up to two loop levels diff --git a/examples/cuda-chill/mv-shadow.c b/examples/cuda-chill/mv-shadow.c new file mode 100644 index 0000000..582b187 --- /dev/null +++ b/examples/cuda-chill/mv-shadow.c @@ -0,0 +1,9 @@ +#define N 1024 + +void normalMV(float c[N][N], float a[N], float b[N]) { + int i, j; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + a[i] = a[i] + c[j][i] * b[j]; +} diff --git a/examples/cuda-chill/mv-shadow.lua b/examples/cuda-chill/mv-shadow.lua new file mode 100644 index 0000000..43e8491 --- /dev/null +++ b/examples/cuda-chill/mv-shadow.lua @@ -0,0 +1,65 @@ +init("mv-shadow.c","normalMV",0) +dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers, + --copy_to_shared methods + +N=129 +TI=32 +TJ=64 + +N=1024 +TI=16 + + + + + + + + + + + + + + + + +--Tile the i and j loop, introducing "ii" as the control loop for the "i" +--tile, "k" for the control loop fo the "j" tile, with the final order +--of {"ii", "k", "i", "j"} +tile_by_index({"i","j"}, {TI,TJ}, {l1_control="ii", l2_control="k"}, {"ii", "k", "i", "j"}) +--tile_by_index({"i"}, {TI}, {l1_control="iii"}, {"ii", "k", "iii","i", "j"}) +--tile_by_index({"j"}, {TI}, {l2_control="k"}, { "k", "i", "j"}) +--tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"}) +--print_code() +--Normalize indx will do a tile size of one over the loop level specified +--by the input index. This is useful to get a zero lower bound and hard +--upper bound on a loop instead of it being relative to previous loop +--levels. +--normalize_index("ii") +normalize_index("i") +print_code() + +--Cudaize now determines the grid dimentions from the loops themselves +--(the upper bounds of the block and thread loops). It also renames the +--given block and thread loops's indexes to the approviate values from +--the set {"bx","by","tx","ty","tz"}. The second parameter specifies the +--size of the arrays to be copied in the CUDA scaffolding. +cudaize("mv_GPU", {a=N, b=N, c=N*N}, {block={"ii"}, thread={"i"}}) +--print_code() + +--Does a datacopy, tile, and add_sync to get a shared memory copy + +--copy_to_shared("tx", "b", 1) +--copy_to_shared("tx", "c", -16) +--print_code() +--copy_to_texture("b") +--copy_to_texture("c") +copy_to_registers("k", "a") +--print_code() + +unroll_to_depth(1) --won't unroll past thread/loop mapping, unrolls up to two loop levels +--copy_to_texture("b") +--print_code() +--unroll(0,5,0) +--print_code() diff --git a/examples/cuda-chill/mv.c b/examples/cuda-chill/mv.c new file mode 100644 index 0000000..582b187 --- /dev/null +++ b/examples/cuda-chill/mv.c @@ -0,0 +1,9 @@ +#define N 1024 + +void normalMV(float c[N][N], float a[N], float b[N]) { + int i, j; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + a[i] = a[i] + c[j][i] * b[j]; +} diff --git a/examples/cuda-chill/mv.lua b/examples/cuda-chill/mv.lua new file mode 100644 index 0000000..ca54501 --- /dev/null +++ b/examples/cuda-chill/mv.lua @@ -0,0 +1,65 @@ +init("mv.c","normalMV",0) +dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers, + --copy_to_shared methods + +N=129 +TI=32 +TJ=64 + +N=1024 + + + + + + + + + + + + + + + + +--Tile the i and j loop, introducing "ii" as the control loop for the "i" +--tile, "k" for the control loop fo the "j" tile, with the final order +--of {"ii", "k", "i", "j"} +tile_by_index({"i","j"}, {TI,TJ}, {l1_control="ii", l2_control="k"}, {"ii", "k", "i", "j"}) +--tile_by_index({"i"}, {TI}, {l1_control="iii"}, {"ii", "k", "iii","i", "j"}) +--tile_by_index({"j"}, {TI}, {l2_control="k"}, { "k", "i", "j"}) +--tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"}) +--print_code() +--Normalize indx will do a tile size of one over the loop level specified +--by the input index. This is useful to get a zero lower bound and hard +--upper bound on a loop instead of it being relative to previous loop +--levels. +--normalize_index("ii") +normalize_index("i") +print_code() + +--Cudaize now determines the grid dimentions from the loops themselves +--(the upper bounds of the block and thread loops). It also renames the +--given block and thread loops's indexes to the approviate values from +--the set {"bx","by","tx","ty","tz"}. The second parameter specifies the +--size of the arrays to be copied in the CUDA scaffolding. +cudaize("mv_GPU", {a=N, b=N, c=N*N}, {block={"ii"}, thread={"i"}}) + +--print_code() + +--Does a datacopy, tile, and add_sync to get a shared memory copy + +--copy_to_shared("tx", "b", 1) +--copy_to_shared("tx", "c", -16) +--print_code() +--copy_to_texture("b") +--copy_to_texture("c") +copy_to_registers("k", "a") +--print_code() + +unroll_to_depth(1) --won't unroll past thread/loop mapping, unrolls up to two loop levels +--copy_to_texture("b") +--print_code() +--unroll(0,5,0) +--print_code() diff --git a/examples/cuda-chill/mv_try.c b/examples/cuda-chill/mv_try.c new file mode 100644 index 0000000..7781f3b --- /dev/null +++ b/examples/cuda-chill/mv_try.c @@ -0,0 +1,9 @@ +#define N 4096 + +void normalMV(int n, float c[N][N], float a[N], float b[N]) { + int i, j; + + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + a[i] = a[i] + c[i][j] * b[j]; +} diff --git a/examples/cuda-chill/mv_try.lua b/examples/cuda-chill/mv_try.lua new file mode 100644 index 0000000..db4d9ad --- /dev/null +++ b/examples/cuda-chill/mv_try.lua @@ -0,0 +1,14 @@ +init("mv_try.c","normalMV",0) +dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers, + --copy_to_shared methods + +TI=96 + +N=4096 + + +tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"}) +cudaize("mv_GPU", {a=N, b=N, c=N*N}, + {block={"ii"}, thread={"i"}}) + +print_code() diff --git a/examples/cuda-chill/nbody.c b/examples/cuda-chill/nbody.c new file mode 100644 index 0000000..57899b6 --- /dev/null +++ b/examples/cuda-chill/nbody.c @@ -0,0 +1,66 @@ +#define NBODIES 16384 +#define SOFTENINGSQUARED 0.01f +#define DELTATIME 0.001f +#define DAMPING 1.0f + +#define NBLOCKSY 1 +#define NBLOCKSX (NBODIES/NTHREADSX) +#define NTHREADSY 1 +#define NTHREADSX 64 + +#define BLOCKSIZE 128 + +#define SHARED 1 +#define TIMER 1 +#define VERIFY 1 + +extern float sqrtf(float); + +void nbody_cpu(float* oldpos,float* oldpos1, float *newpos, float *oldvel, float *newvel, float *force) +{ + float r0,r1,r2; + float invDist, invDistCube, mass, invMass; + unsigned int i,j; + for(i = 0; i < NBODIES; ++i) { + //force[i*4 ] = 0; + //force[i*4+1] = 0; + //force[i*4+2] = 0; + //force[i*4+3] = 0; + for(j = 0; j < NBODIES; ++j) { + r0 = oldpos[j*4]-oldpos1[i*4]; + r1 = oldpos[j*4+1]-oldpos1[i*4+1]; + r2 = oldpos[j*4+2]-oldpos1[i*4+2]; + + invDist = 1.0/sqrtf(r0 * r0 + r1 * r1 + r2 * r2 + SOFTENINGSQUARED); + invDistCube = invDist * invDist * invDist; + mass = oldpos1[i*4+3]; + + force[i*4] = force[i*4] + r0 * mass * invDistCube; + force[i*4+1] = force[i*4+1] + r1 * mass * invDistCube; + force[i*4+2] = force[i*4+2] + r2 * mass * invDistCube; + + } + } + +/* for (i = 0; i < NBODIES; ++i) { + invMass = oldvel[4*i+3]; + + oldvel[4*i] += (force[4*i] * invMass) * DELTATIME * DAMPING; + oldvel[4*i+1] += (force[4*i+1] * invMass) * DELTATIME * DAMPING; + oldvel[4*i+2] += (force[4*i+2] * invMass) * DELTATIME * DAMPING; + + oldpos[4*i] += oldvel[4*i] * DELTATIME; + oldpos[4*i+1] += oldvel[4*i+1] * DELTATIME; + oldpos[4*i+2] += oldvel[4*i+2] * DELTATIME; + + newpos[4*i+0] = oldpos[4*i]; + newpos[4*i+1] = oldpos[4*i+1]; + newpos[4*i+2] = oldpos[4*i+2]; + newpos[4*i+3] = oldpos[4*i+3]; + + newvel[4*i+0] = oldvel[4*i]; + newvel[4*i+1] = oldvel[4*i+1]; + newvel[4*i+2] = oldvel[4*i+2]; + newvel[4*i+3] = oldvel[4*i+3]; + }*/ +} diff --git a/examples/cuda-chill/nbody.lua b/examples/cuda-chill/nbody.lua new file mode 100644 index 0000000..08f88a9 --- /dev/null +++ b/examples/cuda-chill/nbody.lua @@ -0,0 +1,53 @@ +--CUBLAS 2 MM Multiply + +--This function form intializes "CUDAIZE v2" versus "CUDAIZE v1" if you +--call init() and use global variables to specify procedure and loop + +--Second parameter is procedure # and third is loop # +init("nbody.c", "nbody_cpu" , 0) + +dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers, + --copy_to_shared methods +NBODIES=16384 + + +--Tj=128 CHANGE FOR BEST..... BEST IS 64BLOCKS 128THREADS +--Ti=256 +Tj=64 +Ti=32 +Tjjj=1 +Tiii=1 +Tn=0.1 +--normalize_index("j") +-- +--print_code() +--normalize_index("n") +-- TILE COMMANDS ZEROOOOOOOOOOO:3 +--tile_by_index({"i","j"},{Ti,Tj},{l1_control="ii",l2_control="jj"},{"ii","jj","i","j"})--CU=-1 +tile_by_index({"i"},{Ti},{l1_control="ii"},{"ii","i","j"})--CU=-1 +--normalize_index("i") +--tile_by_index({"n"},{Tn},{l1_control="nn"},{"jj","ii","nn","j","i","n"})--CU=-1 + +--tile_by_index({"j","i"},{Tjjj,Tiii},{l1_control="jjj",l2_control="iii"},{"jj","ii","nn","jjj","j","iii","i","n"})--CU=3 +--tile_by_index({"j"}, {Tn}, {l1_control="j",l1_tile="jjj"}, {"ii", "jj", "nn","jjj","j","i","n"}) +--tile_by_index({"i"}, {Ti/2}, {l1_control="iii"}, {"ii","iii", "jj","i","j"}) +--print_code() +cudaize("kernel_GPU",{oldpos=4*NBODIES,oldpos1=4*NBODIES,oldvel=4*NBODIES,force=4*NBODIES,newpos=4*NBODIES,newvel=4*NBODIES},{block={"ii"}, thread={"i"}})--CU=3 +print_code() +--tile(0,6,6) +--copy_to_shared("tx","oldpos",-16) +--copy_to_registers("j","oldpos") +--copy_to_registers("j","oldpos1") +--copy_to_registers("j","force") + +--copy_to_texture("oldpos") +--tile(1,3,3) +--tile(2,3,3) + +print_code() +--unroll_to_depth(1) +-- +--tile(2,3,3) +--unroll(2,3,0) +--unroll(0,5,0) +--print_code() diff --git a/examples/cuda-chill/tmv-shadow.c b/examples/cuda-chill/tmv-shadow.c new file mode 100644 index 0000000..cb9ea8d --- /dev/null +++ b/examples/cuda-chill/tmv-shadow.c @@ -0,0 +1,9 @@ +#define N 1024 + +void normalMV(float c[N][N], float a[N], float b[N]) { + int i, j; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + a[i] = a[i] + c[i][j] * b[j]; +} diff --git a/examples/cuda-chill/tmv-shadow.lua b/examples/cuda-chill/tmv-shadow.lua new file mode 100644 index 0000000..196b939 --- /dev/null +++ b/examples/cuda-chill/tmv-shadow.lua @@ -0,0 +1,50 @@ +init("tmv-shadow.c","normalMV",0) +dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers, + --copy_to_shared methods + +N=1024 +--N= 8209 +--N=129 +TI=64 +N=1024 +TI=32 +--tile, "k" for the control loop for the "j" tile, with the final order +--of {"ii", "k", "i", "j"} +tile_by_index({"i","j"}, {TI,TI}, {l1_control="ii", l2_control="k"}, {"ii", "k", "i", "j"}) +--tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"}) +--print_code() +--tile_by_index({"i"}, {TI/32}, {l1_control="iii"}, {"ii", "k", "iii","i", "j"}) + +--print_code() +--Normalize indx will do a tile size of one over the loop level specified +--by the input index. This is useful to get a zero lower bound and hard +--upper bound on a loop instead of it being relative to previous loop +--levels. +--normalize_index("i") +--print_code() + +--Cudaize now determines the grid dimentions from the loops themselves +--(the upper bounds of the block and thread loops). It also renames the +--given block and thread loops's indexes to the approviate values from +--the set {"bx","by","tx","ty","tz"}. The second parameter specifies the +--size of the arrays to be copied in the CUDA scaffolding. +cudaize("tmv_GPU", {a=N, b=N, c=N*N},{block={"ii"}, thread={"i"}}) + +--print_code() + +--Does a datacopy, tile, and add_sync to get a shared memory copy +copy_to_shared("tx", "b", 1) +--copy_to_texture("b") +--print_code() + +copy_to_shared("tx", "c", -16) +--copy_to_texture("c") +--print_code() + +copy_to_registers("k", "a") +print_code() +--unroll(0,5,0) +--unroll(0,4,0) +--unroll(2,4,16) +unroll_to_depth(1) +--print_code() diff --git a/examples/cuda-chill/tmv.c b/examples/cuda-chill/tmv.c new file mode 100644 index 0000000..cb9ea8d --- /dev/null +++ b/examples/cuda-chill/tmv.c @@ -0,0 +1,9 @@ +#define N 1024 + +void normalMV(float c[N][N], float a[N], float b[N]) { + int i, j; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + a[i] = a[i] + c[i][j] * b[j]; +} diff --git a/examples/cuda-chill/tmv.lua b/examples/cuda-chill/tmv.lua new file mode 100644 index 0000000..5071108 --- /dev/null +++ b/examples/cuda-chill/tmv.lua @@ -0,0 +1,50 @@ +init("tmv.c","normalMV",0) +dofile("cudaize.lua") --defines custom tile_by_index, copy_to_registers, + --copy_to_shared methods + +N=1024 +--N= 8209 +--N=129 +TI=64 +N=1024 +TI=32 +--tile, "k" for the control loop for the "j" tile, with the final order +--of {"ii", "k", "i", "j"} +tile_by_index({"i","j"}, {TI,TI}, {l1_control="ii", l2_control="k"}, {"ii", "k", "i", "j"}) +--tile_by_index({"i"}, {TI}, {l1_control="ii"}, {"ii", "i", "j"}) +--print_code() +--tile_by_index({"i"}, {TI/32}, {l1_control="iii"}, {"ii", "k", "iii","i", "j"}) + +--print_code() +--Normalize indx will do a tile size of one over the loop level specified +--by the input index. This is useful to get a zero lower bound and hard +--upper bound on a loop instead of it being relative to previous loop +--levels. +--normalize_index("i") +--print_code() + +--Cudaize now determines the grid dimentions from the loops themselves +--(the upper bounds of the block and thread loops). It also renames the +--given block and thread loops's indexes to the approviate values from +--the set {"bx","by","tx","ty","tz"}. The second parameter specifies the +--size of the arrays to be copied in the CUDA scaffolding. +cudaize("tmv_GPU", {a=N, b=N, c=N*N},{block={"ii"}, thread={"i"}}) + +--print_code() + +--Does a datacopy, tile, and add_sync to get a shared memory copy +copy_to_shared("tx", "b", 1) +--copy_to_texture("b") +--print_code() + +copy_to_shared("tx", "c", -16) +--copy_to_texture("c") +--print_code() + +copy_to_registers("k", "a") +print_code() +--unroll(0,5,0) +--unroll(0,4,0) +--unroll(2,4,16) +unroll_to_depth(1) +--print_code() diff --git a/examples/fortran/README b/examples/fortran/README new file mode 100644 index 0000000..4f23bee --- /dev/null +++ b/examples/fortran/README @@ -0,0 +1,10 @@ +// Manu + +1) Fortran support added to permute, tile, unroll and datacopy. Tested these w.r.t gemm.c using gemm.script. + There might be other issues (like fusion due to unroll, ...) that have not been tested. + +2) To incorporate Fortran support I had to modify certain values in omega (include/omega/omega_core/oc.h). + To solve for large number of unknowns, these values have to be reverted back. + +3) Tested the existing chill scripts using Derick's python script. + At least the existing chill scripts are not affected by the fortran related changes. diff --git a/examples/fortran/ccd.f b/examples/fortran/ccd.f new file mode 100644 index 0000000..12d834d --- /dev/null +++ b/examples/fortran/ccd.f @@ -0,0 +1,32 @@ +c +c These have been separated out from ccsd_t_singles_l.F and ccsd_t_doubles_l.F +c + subroutine clean_sd_t_s1_1(h3d,h2d,h1d,p6d,p5d,p4d, + 2 triplesx,t1sub,v2sub) + IMPLICIT NONE + integer h3d,h2d,h1d,p6d,p5d,p4d + integer h3,h2,h1,p6,p5,p4 + integer N + double precision triplesx(16,16,16,16,16,16) + double precision t1sub(16,16) + double precision v2sub(16,16,16,16) + + N = 16 + + do p4=1,10 + do p5=1,10 + do p6=1,10 + do h1=1,10 + do h2=1,10 + do h3=1,10 + triplesx(h3,h2,h1,p6,p5,p4)=triplesx(h3,h2,h1,p6,p5,p4) + 1 + t1sub(p4,h1)*v2sub(h3,h2,p6,p5) + enddo + enddo + enddo + enddo + enddo + enddo + return + end + diff --git a/examples/fortran/ccd.script b/examples/fortran/ccd.script new file mode 100644 index 0000000..c2af500 --- /dev/null +++ b/examples/fortran/ccd.script @@ -0,0 +1,18 @@ +source: ccd.f +procedure: clean_sd_t_s1_1 +format : rose +loop: 0 + + + +original() + +UN=4 + +unroll(0,5,4) +unroll(0,4,4) +unroll(0,3,4) +unroll(0,2,4) +unroll(0,1,4) + +print diff --git a/examples/fortran/gemm.f90 b/examples/fortran/gemm.f90 new file mode 100644 index 0000000..b65bb58 --- /dev/null +++ b/examples/fortran/gemm.f90 @@ -0,0 +1,58 @@ +program matmul + + integer N,i,j,k + real*8 a(10,10), b(10,10), c(10,10), ct(10,10),mysum + + do i=1,10,1 + do j=1,10,1 + a(i,j) = i+j + b(i,j) = i-j + c(i,j) = 0.0 + ct(i,j) = 0.0 + end do + b(i,i) = 1.0; + end do + + + DO j=1,10,1 + DO k=1,10,1 + DO i=1,10,1 + c(i,j) = c(i,j)+a(i,k)*b(k,j) + end do + end do + end do + + + + call gemm(10,a,b,ct) + + mysum = 0.0 + do i=1,10,1 + do j=1,10,1 + mysum = c(i,j) - ct(i,j) + end do + end do + + if (abs(mysum) >= 0.00001) then + write (*,*) "Something wrong" + else + write (*,*) "Output matches" + end if + +end program matmul + + SUBROUTINE gemm(N,A,B,C) + INTEGER N + REAL*8 A(N,N), B(N,N), C(N,N) + + INTEGER I,J,K + + DO J=1,N,1 + DO K=1,N,1 + DO I=1,N,1 + C(I,J) = C(I,J)+A(I,K)*B(K,J) + end do + end do + end do + + END subroutine diff --git a/examples/fortran/gemm.script b/examples/fortran/gemm.script new file mode 100644 index 0000000..01eb859 --- /dev/null +++ b/examples/fortran/gemm.script @@ -0,0 +1,30 @@ +#matrix multiply large array size for intel machine +source: gemm.f90 +procedure: gemm +format: rose +loop: 0 + +TI = 128 +#TI = 4 +TJ = 8 +#TK = 3 +TK = 512 +UI = 2 +UJ = 2 + +permute([3,1,2]) +tile(0,2,TJ) +#print space +tile(0,2,TI) +#print space +tile(0,5,TK) +#print space + + +datacopy(0,3,A,false,-1) +#print space + +datacopy(0,4,B) +unroll(0,4,UI) +unroll(0,5,UJ) + diff --git a/examples/fortran/rose_gemm.f90 b/examples/fortran/rose_gemm.f90 new file mode 100644 index 0000000..d150922 --- /dev/null +++ b/examples/fortran/rose_gemm.f90 @@ -0,0 +1,155 @@ +PROGRAM matmul +INTEGER :: N, i, j, k +REAL(kind=8) :: a(10,10), b(10,10), c(10,10), ct(10,10), mysum +DO i = 1, 10, 1 +DO j = 1, 10, 1 +a(i,j) = i + j +b(i,j) = i - j +c(i,j) = 0.0 +ct(i,j) = 0.0 +END DO +b(i,i) = 1.0 +END DO +DO j = 1, 10, 1 +DO k = 1, 10, 1 +DO i = 1, 10, 1 +c(i,j) = c(i,j) + a(i,k) * b(k,j) +END DO +END DO +END DO +CALL gemm(10,a,b,ct) +mysum = 0.0 +DO i = 1, 10, 1 +DO j = 1, 10, 1 +mysum = c(i,j) - ct(i,j) +END DO +END DO +IF (abs(mysum) >= 0.00001) THEN +WRITE (*, FMT=*) "Something wrong" +ELSE +WRITE (*, FMT=*) "Output matches" +END IF +END PROGRAM matmul + +SUBROUTINE gemm(N,A,B,C) +INTEGER :: t12 +INTEGER :: t10 +INTEGER :: t8 +INTEGER :: t6 +INTEGER :: t4 +INTEGER :: t2 +INTEGER :: chill_t64 +INTEGER :: chill_t63 +INTEGER :: chill_t62 +INTEGER :: chill_t61 +INTEGER :: chill_t60 +INTEGER :: chill_t59 +INTEGER :: chill_t58 +INTEGER :: chill_t57 +INTEGER :: chill_t56 +INTEGER :: chill_t55 +INTEGER :: chill_t54 +INTEGER :: chill_t53 +INTEGER :: chill_t52 +INTEGER :: chill_t51 +INTEGER :: chill_t50 +INTEGER :: chill_t49 +INTEGER :: chill_t48 +INTEGER :: chill_t47 +INTEGER :: over2 +INTEGER :: chill_t46 +INTEGER :: chill_t45 +INTEGER :: chill_t44 +INTEGER :: chill_t43 +INTEGER :: chill_t42 +INTEGER :: chill_t41 +INTEGER :: chill_t40 +INTEGER :: chill_t39 +INTEGER :: chill_t38 +INTEGER :: chill_t37 +INTEGER :: chill_t36 +INTEGER :: chill_t35 +INTEGER :: chill_t34 +INTEGER :: chill_t33 +INTEGER :: chill_t32 +INTEGER :: chill_t31 +INTEGER :: chill_t30 +INTEGER :: chill_t29 +INTEGER :: chill_t28 +INTEGER :: chill_t27 +INTEGER :: chill_t26 +INTEGER :: chill_t25 +INTEGER :: chill_t24 +INTEGER :: chill_t23 +INTEGER :: over1 +INTEGER :: chill_t22 +INTEGER :: chill_t21 +INTEGER :: chill_t20 +INTEGER :: chill_t19 +INTEGER :: chill_t18 +INTEGER :: chill_t17 +INTEGER :: chill_t16 +INTEGER :: chill_t15 +REAL(kind=8), DIMENSION(8,512) :: f_P2 +INTEGER :: chill_t14 +INTEGER :: chill_t13 +INTEGER :: chill_t12 +INTEGER :: chill_t11 +INTEGER :: chill_t10 +INTEGER :: chill_t9 +INTEGER :: chill_t8 +INTEGER :: chill_t7 +REAL(kind=8), DIMENSION(512,128) :: f_P1 +INTEGER :: chill_t1 +INTEGER :: chill_t2 +INTEGER :: chill_t4 +INTEGER :: chill_t6 +INTEGER :: chill_t5 +INTEGER :: N +REAL(kind=8) :: A(N,N), B(N,N), C(N,N) +INTEGER :: I, J, K +over1 = 0 +over2 = 0 +DO t2 = 1, N, 512 +DO t4 = 1, N, 128 +DO t6 = t2, merge(N,t2 + 511,N <= t2 + 511), 1 +DO t8 = t4, merge(t4 + 127,N,t4 + 127 <= N), 1 +f_P1(t8 - t4 + 1,t6 - t2 + 1) = A(t8,t6) +END DO +END DO +DO t6 = 1, N, 8 +DO t8 = t6, merge(N,t6 + 7,N <= t6 + 7), 1 +DO t10 = t2, merge(N,t2 + 511,N <= t2 + 511), 1 +f_P2(t10 - t2 + 1,t8 - t6 + 1) = B(t10,t8) +END DO +END DO +over1 = MOD(N,2) +DO t8 = t4, merge(-over1 + N,t4 + 126,-over1 + N <= t4 + 126), 2 +over2 = MOD(N,2) +DO t10 = t6, merge(t6 + 6,N - over2,t6 + 6 <= N - over2), 2 +DO t12 = t2, merge(t2 + 511,N,t2 + 511 <= N), 1 +C(t8,t10) = C(t8,t10) + f_P1(t8 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 - t6 + 1) +C(t8 + 1,t10) = C(t8 + 1,t10) + f_P1(t8 + 1 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 - t6 + 1) +C(t8,t10 + 1) = C(t8,t10 + 1) + f_P1(t8 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 + 1 - t6 + 1) +C(t8 + 1,t10 + 1) = C(t8 + 1,t10 + 1) + f_P1(t8 + 1 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 + 1 - t6 + 1) +END DO +END DO +IF (N - 7 <= t6 .AND. 1 <= over2) THEN +DO t12 = t2, merge(N,t2 + 511,N <= t2 + 511), 1 +C(t8,N) = C(t8,N) + f_P1(t8 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,N - t6 + 1) +C(t8 + 1,N) = C(t8 + 1,N) + f_P1(t8 + 1 - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,N - t6 + 1) +END DO +END IF +END DO +IF (N - 127 <= t4 .AND. 1 <= over1) THEN +DO t10 = t6, merge(t6 + 7,N,t6 + 7 <= N), 1 +DO t12 = t2, merge(t2 + 511,N,t2 + 511 <= N), 1 +C(N,t10) = C(N,t10) + f_P1(N - t4 + 1,t12 - t2 + 1) * f_P2(t12 - t2 + 1,t10 - t6 + 1) +END DO +END DO +END IF +END DO +END DO +END DO +END SUBROUTINE + diff --git a/graph-test.cc b/graph-test.cc new file mode 100644 index 0000000..3cdcbee --- /dev/null +++ b/graph-test.cc @@ -0,0 +1,148 @@ +#include "graph.hh" + +using std::cout; +using std::endl; +template<typename T> +struct A { +}; + +template struct Graph<Empty,Empty>; + +int main() { + Graph<> g; + + for (int i = 0; i < 8; i++) + g.insert(); + + std::vector<Empty> t; + t.push_back(Empty()); + t.push_back(Empty()); + + g.connect(0,1); + g.connect(1,4); + g.connect(4,0); + g.connect(4,5); + g.connect(1,5); + g.connect(1,2); + g.connect(2,3); + g.connect(3,2); + g.connect(2,6); + g.connect(5,6); + g.connect(6,5); + g.connect(6,7); + g.connect(3,7); + g.connect(7,7,t); + + g.insert(); + g.insert(); + g.connect(9,8); + g.connect(8,0); + + cout << "Graph #1:" << endl; + cout << g; + + std::vector<std::set<int> > r = g.topoSort(); + + cout << "topological order: "; + int num_scc = 0; + for (int i = 0; i < r.size(); i++) { + if (i != 0) + cout << ' '; + if (r[i].size() > 1) { + cout << '('; + num_scc++; + } + for (std::set<int>::iterator j = r[i].begin(); j != r[i].end(); j++) { + if (j != r[i].begin()) + cout << ' '; + cout << (*j+1); + } + if (r[i].size() > 1) + cout << ')'; + } + cout << endl; + cout << "total number of SCC: " << num_scc << endl; + + Graph<> g2; + + for (int i = 0; i < 6; i++) + g2.insert(); + + g2.connect(0,1); + g2.connect(0,2); + g2.connect(3,4); + g2.connect(3,5); + g2.connect(3,2); + g2.connect(5,0); + + cout << endl << "Graph #2:" << endl; + cout << g2; + + std::vector<std::set<int> > r2 = g2.packed_topoSort(); + + cout << "packed topological order: "; + for (int i = 0; i < r2.size(); i++) { + if (i != 0) + cout << ' '; + if (r2[i].size() > 1) + cout << '('; + for (std::set<int>::iterator j = r2[i].begin(); j != r2[i].end(); j++) { + if (j != r2[i].begin()) + cout << ' '; + cout << (*j+1); + } + if (r2[i].size() > 1) + cout << ')'; + } + cout << endl; + + Graph<> g3; + + for (int i = 0; i < 6; i++) + g3.insert(); + + g3.connect(5,2); + g3.connect(5,3); + g3.connect(5,4); + g3.connect(3,1); + g3.connect(1,0); + + cout << endl << "Graph #3:" << endl; + cout << g3; + + std::vector<std::set<int> > r3 = g3.topoSort(); + + cout << "topological order: "; + for (int i = 0; i < r3.size(); i++) { + if (i != 0) + cout << ' '; + if (r3[i].size() > 1) + cout << '('; + for (std::set<int>::iterator j = r3[i].begin(); j != r3[i].end(); j++) { + if (j != r3[i].begin()) + cout << ' '; + cout << (*j+1); + } + if (r3[i].size() > 1) + cout << ')'; + } + cout << endl; + + r3 = g3.packed_topoSort(); + + cout << "packed topological order: "; + for (int i = 0; i < r3.size(); i++) { + if (i != 0) + cout << ' '; + if (r3[i].size() > 1) + cout << '('; + for (std::set<int>::iterator j = r3[i].begin(); j != r3[i].end(); j++) { + if (j != r3[i].begin()) + cout << ' '; + cout << (*j+1); + } + if (r3[i].size() > 1) + cout << ')'; + } + cout << endl; +} diff --git a/graph.hh b/graph.hh new file mode 100644 index 0000000..504c147 --- /dev/null +++ b/graph.hh @@ -0,0 +1,318 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2010 University of Utah + All Rights Reserved. + + Purpose: + Graph<VertexType, EdgeType> template class supports topological sort + with return result observing strongly connected component. + + Notes: + The result of topologically sorting a graph V={1,2,3,4} and E={1->2, 1->3, + 2->3, 3->2, 3->4} is ({1}, {2,3}, {4}). + + History: + 01/2006 Created by Chun Chen. + 07/2010 add a new topological order, -chun +*****************************************************************************/ + +#ifndef GRAPH_HH +#define GRAPH_HH + +#include <set> +#include <vector> +#include <map> +#include <iostream> +#include <stack> +#include <algorithm> +#include <assert.h> + +struct Empty { + Empty() {}; + bool operator<(const Empty &) const { return true; }; + bool operator==(const Empty &) const { return false; }; + friend std::ostream& operator<<(std::ostream &os, const Empty &) { return os; }; +}; + +namespace { + enum GraphColorType {WHITE, GREY, BLACK}; +} + +template<typename VertexType, typename EdgeType> struct Graph; +template<typename VertexType, typename EdgeType> std::ostream& operator<<(std::ostream &os, const Graph<VertexType, EdgeType> &g); + +template<typename VertexType = Empty, typename EdgeType = Empty> +struct Graph { + typedef std::map<int, std::vector<EdgeType> > EdgeList; + typedef std::vector<std::pair<VertexType, EdgeList> > VertexList; + + VertexList vertex; + bool directed; + + Graph(bool directed = true); + + int vertexCount() const; + int edgeCount() const; + bool isEmpty() const; + bool isDirected() const; + int insert(const VertexType &v = VertexType()); + void connect(int v1, int v2, const EdgeType &e = EdgeType()); + void connect(int v1, int v2, const std::vector<EdgeType> &e); + void disconnect(int v1, int v2); + bool hasEdge(int v1, int v2) const; + std::vector<EdgeType> getEdge(int v1, int v2) const; + + std::vector<std::set<int> > topoSort() const; + std::vector<std::set<int> > packed_topoSort() const; + + void dump() { + std::cout << *this; + } + + friend std::ostream& operator<< <>(std::ostream &os, const Graph<VertexType, EdgeType> &g); +}; + +template<typename VertexType, typename EdgeType> +std::ostream& operator<<(std::ostream &os, const Graph<VertexType, EdgeType> &g) { + for (int i = 0; i < g.vertex.size(); i++) + for (typename Graph<VertexType,EdgeType>::EdgeList::const_iterator j = g.vertex[i].second.begin(); j != g.vertex[i].second.end(); j++) { + os << i+1 << "->" << j->first+1 << ":"; + for (typename std::vector<EdgeType>::const_iterator k = j->second.begin(); k != j->second.end(); k++) + os << " " << *k; + os << std::endl; + } + + return os; +} + + +template<typename VertexType, typename EdgeType> +Graph<VertexType, EdgeType>::Graph(bool directed_): + directed(directed_) { +} + +template<typename VertexType, typename EdgeType> +int Graph<VertexType, EdgeType>::vertexCount() const { + return vertex.size(); +} + +template<typename VertexType, typename EdgeType> +int Graph<VertexType, EdgeType>::edgeCount() const { + int result = 0; + + for (int i = 0; i < vertex.size(); i++) + for (typename EdgeList::const_iterator j = vertex[i].second.begin(); j != vertex[i].second.end(); j++) + result += j->second.size(); + + if (!directed) + result = result/2; + + return result; +} + +template<typename VertexType, typename EdgeType> +bool Graph<VertexType, EdgeType>::isEmpty() const { + return vertex.size() == 0; +} + +template<typename VertexType, typename EdgeType> +bool Graph<VertexType, EdgeType>::isDirected() const { + return directed; +} + +template<typename VertexType, typename EdgeType> +int Graph<VertexType, EdgeType>::insert(const VertexType & v) { + for (int i = 0; i < vertex.size(); i++) + if (vertex[i].first == v) + return i; + + vertex.push_back(std::make_pair(v, EdgeList())); + return vertex.size() - 1; +} + + +template<typename VertexType, typename EdgeType> +void Graph<VertexType, EdgeType>::connect(int v1, int v2, const EdgeType &e) { + assert(v1 < vertex.size() && v2 < vertex.size()); + + vertex[v1].second[v2].push_back(e);; + if (!directed) + vertex[v2].second[v1].push_back(e); +} + +template<typename VertexType, typename EdgeType> +void Graph<VertexType, EdgeType>::connect(int v1, int v2, const std::vector<EdgeType> &e) { + assert(v1 < vertex.size() && v2 < vertex.size()); + + if (e.size() == 0) + return; + + copy(e.begin(), e.end(), back_inserter(vertex[v1].second[v2])); + if (!directed) + copy(e.begin(), e.end(), back_inserter(vertex[v2].second[v1])); +} + +template<typename VertexType, typename EdgeType> +void Graph<VertexType, EdgeType>::disconnect(int v1, int v2) { + assert(v1 < vertex.size() && v2 < vertex.size()); + + vertex[v1].second.erase(v2); + if (!directed) + vertex[v2].second.erase(v1); +} + +template<typename VertexType, typename EdgeType> +bool Graph<VertexType,EdgeType>::hasEdge(int v1, int v2) const { + return vertex[v1].second.find(v2) != vertex[v1].second.end(); +} + +template<typename VertexType, typename EdgeType> +std::vector<EdgeType> Graph<VertexType,EdgeType>::getEdge(int v1, int v2) const { + if (!hasEdge(v1, v2)) + return std::vector<EdgeType>(); + + return vertex[v1].second.find(v2)->second; +} + +// This topological sort does handle SCC in graph. +template<typename VertexType, typename EdgeType> +std::vector<std::set<int> > Graph<VertexType, EdgeType>::topoSort() const { + const int n = vertex.size(); + std::vector<GraphColorType> color(n, WHITE); + std::stack<int> S; + + std::vector<int> order(n); + int c = n; + + // first DFS + for (int i = n-1; i >= 0; i--) + if (color[i] == WHITE) { + S.push(i); + while (!S.empty()) { + int v = S.top(); + + if (color[v] == WHITE) { + for (typename EdgeList::const_iterator j = vertex[v].second.begin(); j != vertex[v].second.end(); j++) + if (color[j->first] == WHITE) + S.push(j->first); + + color[v] = GREY; + } + else if (color[v] == GREY) { + color[v] = BLACK; + S.pop(); + order[--c] = v; + } + else { + S.pop(); + } + } + } + + // transpose edge + std::vector<std::set<int> > edgeT(n); + for (int i = 0; i < n; i++) + for (typename EdgeList::const_iterator j = vertex[i].second.begin(); j != vertex[i].second.end(); j++) + edgeT[j->first].insert(i); + + // second DFS in transposed graph starting from last finished vertex + fill(color.begin(), color.end(), WHITE); + std::vector<std::set<int> > result; + for (int i = 0; i < n; i++) + if (color[order[i]] == WHITE) { + std::set<int> s; + + S.push(order[i]); + while (!S.empty()) { + int v = S.top(); + + if(color[v] == WHITE) { + for (std::set<int>::const_iterator j = edgeT[v].begin(); j != edgeT[v].end(); j++) + if (color[*j] == WHITE) + S.push(*j); + + color[v] = GREY; + } + else if (color[v] == GREY) { + color[v] = BLACK; + S.pop(); + s.insert(v); + } + else { + S.pop(); + } + } + + result.push_back(s); + } + + return result; +} + +// This topological sort does not handle SCC in graph. +template<typename VertexType, typename EdgeType> +std::vector<std::set<int> > Graph<VertexType, EdgeType>::packed_topoSort() const { + const int n = vertex.size(); + std::vector<GraphColorType> color(n, WHITE); + std::stack<int> S; + + std::vector<bool> is_root(n, false); + std::vector<std::set<int> > edges(n); + + // first DFS + for (int i = n-1; i >= 0; i--) + if (color[i] == WHITE) { + S.push(i); + is_root[i] = true; + while (!S.empty()) { + int v = S.top(); + + if (color[v] == WHITE) { + for (typename EdgeList::const_iterator j = vertex[v].second.begin(); j != vertex[v].second.end(); j++) + if (color[j->first] == WHITE) { + S.push(j->first); + edges[v].insert(j->first); + } + else if (color[j->first] == BLACK) { + if (is_root[j->first]) { + is_root[j->first] = false; + edges[v].insert(j->first); + } + } + + color[v] = GREY; + } + else if (color[v] == GREY) { + color[v] = BLACK; + S.pop(); + } + else { + S.pop(); + } + } + } + + + // second BFS in DFS tree starting from roots + std::vector<std::set<int> > result; + std::set<int> s; + for (int i = 0; i < n; i++) + if (is_root[i]) + s.insert(i); + if (s.size() != 0) { + result.push_back(s); + while (true) { + std::set<int> s; + for (std::set<int>::iterator i = result[result.size()-1].begin(); i != result[result.size()-1].end(); i++) + s.insert(edges[*i].begin(), edges[*i].end()); + if (s.size() != 0) + result.push_back(s); + else + break; + } + } + + return result; +} + +#endif diff --git a/ir_code.hh b/ir_code.hh new file mode 100644 index 0000000..1f853fa --- /dev/null +++ b/ir_code.hh @@ -0,0 +1,263 @@ +/***************************************************************************** + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + CHiLL's compiler intermediate representation interface that extends + Omega's builder interface to accomodate compiler analyses and + extra code generation. +. + Notes: + Unlike CG_outputRepr, IR_Symbol,IR_Ref and IR_Control are place holders + to the underlying code, thus deleting or duplicating them does not affect + the actual code. Similar to Omega builder's memory allocation strategy, + all non-const pointer parameters of CG_outputRepr/IR_Symbol/IR_Ref/IR_Control + are destroyed after the call. + + History: + 02/2009 Created by Chun Chen. + 06/2010 Add IR_Control interface, by chun. +*****************************************************************************/ + +#ifndef IR_CODE_HH +#define IR_CODE_HH + +#include <code_gen/CG_outputRepr.h> +#include <code_gen/CG_outputBuilder.h> +#include <vector> + +enum IR_OPERATION_TYPE {IR_OP_CONSTANT, IR_OP_VARIABLE, + IR_OP_PLUS, IR_OP_MINUS, IR_OP_MULTIPLY, IR_OP_DIVIDE, + IR_OP_POSITIVE, IR_OP_NEGATIVE, + IR_OP_MIN, IR_OP_MAX, + IR_OP_ASSIGNMENT, + IR_OP_NULL, IR_OP_UNKNOWN}; +enum IR_CONTROL_TYPE {IR_CONTROL_LOOP, IR_CONTROL_IF, IR_CONTROL_WHILE, IR_CONTROL_BLOCK}; +enum IR_CONSTANT_TYPE {IR_CONSTANT_INT, IR_CONSTANT_FLOAT, + IR_CONSTANT_UNKNOWN}; +enum IR_CONDITION_TYPE {IR_COND_LT, IR_COND_LE, + IR_COND_GT, IR_COND_GE, + IR_COND_EQ, IR_COND_NE, + IR_COND_UNKNOWN}; +enum IR_ARRAY_LAYOUT_TYPE {IR_ARRAY_LAYOUT_ROW_MAJOR, + IR_ARRAY_LAYOUT_COLUMN_MAJOR, + IR_ARRAY_LAYOUT_SPACE_FILLING}; + +class IR_Code; + + +// Base abstract class for scalar and array symbols. This is a place +// holder for related declaration in IR code. +struct IR_Symbol { + const IR_Code *ir_; + + virtual ~IR_Symbol() {/* ir_ is not the responsibility of this object */} + virtual int n_dim() const = 0; + virtual std::string name() const = 0; + virtual bool operator==(const IR_Symbol &that) const = 0; + virtual bool operator!=(const IR_Symbol &that) const {return !(*this == that);} + virtual IR_Symbol *clone() const = 0; /* shallow copy */ +}; + + +struct IR_ScalarSymbol: public IR_Symbol { + virtual ~IR_ScalarSymbol() {} + int n_dim() const {return 0;} + virtual int size() const = 0; +}; + + +struct IR_ArraySymbol: public IR_Symbol { + virtual ~IR_ArraySymbol() {} + virtual int elem_size() const = 0; + virtual omega::CG_outputRepr *size(int dim) const = 0; + virtual IR_ARRAY_LAYOUT_TYPE layout_type() const = 0; +}; + + +// Base abstract class for scalar and array references. This is a +// place holder for related code in IR code. +struct IR_Ref { + const IR_Code *ir_; + + virtual ~IR_Ref() {/* ir_ is not the responsibility of this object */} + virtual int n_dim() const = 0; + virtual bool is_write() const = 0; + virtual std::string name() const = 0; + virtual bool operator==(const IR_Ref &that) const = 0; + virtual bool operator!=(const IR_Ref &that) const {return !(*this == that);} + virtual omega::CG_outputRepr *convert() = 0; + virtual IR_Ref *clone() const = 0; /* shallow copy */ +}; + + +struct IR_ConstantRef: public IR_Ref { + IR_CONSTANT_TYPE type_; + + virtual ~IR_ConstantRef() {} + int n_dim() const {return 0;} + bool is_write() const {return false;} + std::string name() const {return std::string();} + virtual bool is_integer() const {return type_ == IR_CONSTANT_INT;} + virtual omega::coef_t integer() const = 0; +}; + + +struct IR_ScalarRef: public IR_Ref { + virtual ~IR_ScalarRef() {} + int n_dim() const {return 0;} + virtual IR_ScalarSymbol *symbol() const = 0; + std::string name() const { + IR_ScalarSymbol *sym = symbol(); + std::string s = sym->name(); + delete sym; + return s; + } + virtual int size() const { + IR_ScalarSymbol *sym = symbol(); + int s = sym->size(); + delete sym; + return s; + } +}; + + +struct IR_ArrayRef: public IR_Ref { + virtual ~IR_ArrayRef() {} + int n_dim() const { + IR_ArraySymbol *sym = symbol(); + int n = sym->n_dim(); + delete sym; + return n; + } + virtual omega::CG_outputRepr *index(int dim) const = 0; + virtual IR_ArraySymbol *symbol() const = 0; + std::string name() const { + IR_ArraySymbol *sym = symbol(); + std::string s = sym->name(); + delete sym; + return s; + } + virtual int elem_size() const { + IR_ArraySymbol *sym = symbol(); + int s = sym->elem_size(); + delete sym; + return s; + } + virtual IR_ARRAY_LAYOUT_TYPE layout_type() const { + IR_ArraySymbol *sym = symbol(); + IR_ARRAY_LAYOUT_TYPE t = sym->layout_type(); + delete sym; + return t; + } +}; + + +struct IR_Block; + +// Base abstract class for code structures. This is a place holder +// for the actual structure in the IR code. However, in cases that +// original source code may be transformed during loop initialization +// such as converting a while loop to a for loop or reconstructing the +// loop from low level IR code, the helper loop class (NOT +// IMPLEMENTED) must contain the transformed code that needs to be +// freed when out of service. +struct IR_Control { + const IR_Code *ir_; + + virtual ~IR_Control() {/* ir_ is not the responsibility of this object */} + virtual IR_CONTROL_TYPE type() const = 0; + virtual IR_Block *convert() = 0; + virtual IR_Control *clone() const = 0; /* shallow copy */ +}; + + +struct IR_Loop: public IR_Control { + virtual ~IR_Loop() {} + virtual IR_ScalarSymbol *index() const = 0; + virtual omega::CG_outputRepr *lower_bound() const = 0; + virtual omega::CG_outputRepr *upper_bound() const = 0; + virtual IR_CONDITION_TYPE stop_cond() const = 0; + virtual IR_Block *body() const = 0; + virtual int step_size() const = 0; + IR_CONTROL_TYPE type() const { return IR_CONTROL_LOOP; } +}; + + +struct IR_Block: public IR_Control { + virtual ~IR_Block() {} + virtual omega::CG_outputRepr *extract() const = 0; + IR_Block *convert() {return this;} + IR_CONTROL_TYPE type() const { return IR_CONTROL_BLOCK; } + virtual omega::CG_outputRepr *original() const = 0; +}; + + +struct IR_If: public IR_Control { + virtual ~IR_If() {} + virtual omega::CG_outputRepr *condition() const = 0; + virtual IR_Block *then_body() const = 0; + virtual IR_Block *else_body() const = 0; + IR_CONTROL_TYPE type() const { return IR_CONTROL_IF; } +}; + + + +struct IR_While: public IR_Control { + // NOT IMPLEMENTED +}; + + +// Abstract class for compiler IR. +class IR_Code { +protected: + omega::CG_outputBuilder *ocg_; + omega::CG_outputRepr *init_code_; + omega::CG_outputRepr *cleanup_code_; + +public: + IR_Code() {ocg_ = NULL; init_code_ = cleanup_code_ = NULL;} + virtual ~IR_Code() { delete ocg_; delete init_code_; delete cleanup_code_; } /* the content of init and cleanup code have already been released in derived classes */ + + // memory_type is for differentiating the location of where the new memory is allocated. + // this is useful for processors with heterogeneous memory hierarchy. + virtual IR_ScalarSymbol *CreateScalarSymbol(const IR_Symbol *sym, int memory_type) = 0; + virtual IR_ArraySymbol *CreateArraySymbol(const IR_Symbol *sym, std::vector<omega::CG_outputRepr *> &size, int memory_type) = 0; + + virtual IR_ScalarRef *CreateScalarRef(const IR_ScalarSymbol *sym) = 0; + virtual IR_ArrayRef *CreateArrayRef(const IR_ArraySymbol *sym, std::vector<omega::CG_outputRepr *> &index) = 0; + virtual int ArrayIndexStartAt() {return 0;} + + // Array references should be returned in their accessing order. + // e.g. s1: A[i] = A[i-1] + // s2: B[C[i]] = D[i] + E[i] + // return A[i-1], A[i], D[i], E[i], C[i], B[C[i]] in this order. + virtual std::vector<IR_ArrayRef *> FindArrayRef(const omega::CG_outputRepr *repr) const = 0; + virtual std::vector<IR_ScalarRef *> FindScalarRef(const omega::CG_outputRepr *repr) const = 0; + + // If there is no sub structure interesting inside the block, return empty, + // so we know when to stop looking inside. + virtual std::vector<IR_Control *> FindOneLevelControlStructure(const IR_Block *block) const = 0; + + // All controls must be in the same block, at the same level and in + // contiguous lexical order as appeared in parameter vector. + virtual IR_Block *MergeNeighboringControlStructures(const std::vector<IR_Control *> &controls) const = 0; + + virtual IR_Block *GetCode() const = 0; + virtual void ReplaceCode(IR_Control *old, omega::CG_outputRepr *repr) = 0; + virtual void ReplaceExpression(IR_Ref *old, omega::CG_outputRepr *repr) = 0; + + virtual IR_OPERATION_TYPE QueryExpOperation(const omega::CG_outputRepr *repr) const = 0; + virtual IR_CONDITION_TYPE QueryBooleanExpOperation(const omega::CG_outputRepr *repr) const = 0; + virtual std::vector<omega::CG_outputRepr *> QueryExpOperand(const omega::CG_outputRepr *repr) const = 0; + virtual IR_Ref *Repr2Ref(const omega::CG_outputRepr *repr) const = 0; + + //--------------------------------------------------------------------------- + // CC Omega code builder interface here + //--------------------------------------------------------------------------- + omega::CG_outputBuilder *builder() const {return ocg_;} + +}; + +#endif + diff --git a/ir_cuda_rose_utils.cc b/ir_cuda_rose_utils.cc new file mode 100644 index 0000000..e7b4c37 --- /dev/null +++ b/ir_cuda_rose_utils.cc @@ -0,0 +1,191 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009 University of Utah + All Rights Reserved. + + Purpose: + SUIF interface utilities. + + Notes: + + Update history: + 01/2006 created by Chun Chen +*****************************************************************************/ + +//#include <suif1.h> +#include "ir_rose_utils.hh" + + +/** + * Returns the body of the for loop found by finding the first loop in + * code, and if level > 1 recursively calling on the body of the found + * loop and (level-1) + */ +SgNode* loop_body_at_level(SgNode* tnl, int level) { + SgNode *inner_nl = 0; + //Now strip out the tnl on the inner level of the for loop + //tree_node_list_iter tnli(tnl); + + if (isSgBasicBlock(tnl)) { + + SgStatementPtrList& tnli = isSgBasicBlock(tnl)->get_statements(); + + for (SgStatementPtrList::iterator it = tnli.begin(); it != tnli.end(); + it++) { + if (isSgForStatement(*it)) { + inner_nl = loop_body_at_level(isSgForStatement(*it), level); + break; + } + + } + + } + + return inner_nl; +} + +SgNode* loop_body_at_level(SgForStatement* loop, int level) { + if (level > 1) + return loop_body_at_level(loop->get_loop_body(), level - 1); + return loop->get_loop_body(); +} + +void swap_node_for_node_list(SgNode* tn, SgNode* new_tnl) { + SgStatement *s = isSgStatement(tn); + + SgStatement* p; + if (s != 0) { + p = isSgStatement(tn->get_parent()); + + if (p != 0) { + + if (isSgBasicBlock(new_tnl)) { + + /*SgStatementPtrList & list_ = + isSgBasicBlock(new_tnl)->get_statements(); + + if (isSgForStatement(p)) { + if (!isSgBasicBlock(isSgForStatement(p)->get_loop_body())) + p->replace_statement(s, isSgStatement(new_tnl)); + else { + p->insert_statement(s, list_, true); + p->remove(s); + } + } else { + p->insert_statement(s, list_, true); + p->remove(s); + } + */ + if (isSgForStatement(p)) { + if (!isSgBasicBlock(isSgForStatement(p)->get_loop_body())) + p->replace_statement(s, isSgStatement(new_tnl)); + else { + + SgStatementPtrList& list_ = + isSgBasicBlock(new_tnl)->get_statements(); + + //std::vector<SgStatement*> list; + + SgStatementPtrList::iterator it = list_.begin(); + SgStatement* begin = *it; + begin->set_parent(p); + + p->replace_statement(s, begin); + it++; + //SgStatement* stmt = first; + SgStatement* temp = begin; + for (; it != list_.end(); it++) { + (*it)->set_parent(p); + p->insert_statement(temp, *it, false); + temp = *it; + } + + } + + } else { + + + SgStatementPtrList& list_ = + isSgBasicBlock(new_tnl)->get_statements(); + + //std::vector<SgStatement*> list; + + SgStatementPtrList::iterator it = list_.begin(); + SgStatement* begin = *it; + begin->set_parent(p); + + p->replace_statement(s, begin); + it++; + //SgStatement* stmt = first; + SgStatement* temp = begin; + for (; it != list_.end(); it++) { + (*it)->set_parent(p); + p->insert_statement(temp, *it, false); + temp = *it; + } + + } + + /* SgStatement* temp = s; + + SgStatementPtrList::iterator it = list_.begin(); + p->insert_statement(temp, *it, true); + temp = *it; + p->remove_statement(s); + it++; + for (; it != list_.end(); it++) { + p->insert_statement(temp, *it, false); + temp = *it; + } + + // new_tnl->set_parent(p); + //new_tnl->get_statements(); + SgStatementPtrList& list = + isSgBasicBlock(new_tnl)->get_statements(); + + //std::vector<SgStatement*> list; + + SgStatementPtrList::iterator it = list.begin(); + SgStatement* begin = *it; + begin->set_parent(p); + + p->replace_statement(s, begin); + it++; + //SgStatement* stmt = first; + SgStatement* temp = begin; + for (; it != list.end(); it++) { + (*it)->set_parent(p); + p->insert_statement(temp, *it, false); + temp = *it; + } + */ + /* SgStatementPtrList& stmt_list = isSgBasicBlock(new_tnl)->get_statements(); + SgStatement* target = s; + + for(SgStatementPtrList::iterator it = stmt_list.begin() ; it != stmt_list.end(); it++) + { + isSgNode(*it)->set_parent(p); + p->insert_statement(isSgStateme, *it, false); + target = *it; + } + + p->remove_statement(s); + + */ + }else if(isSgIfStmt(p)) { + + if(isSgIfStmt(p)->get_true_body() == s) + isSgIfStmt(p)->set_true_body(isSgStatement(new_tnl)); + else if(isSgIfStmt(p)->get_false_body() == s) + isSgIfStmt(p)->set_false_body(isSgStatement(new_tnl)); + new_tnl->set_parent(p); + } + else { + p->replace_statement(s, isSgStatement(new_tnl)); + new_tnl->set_parent(p); + } + } + + } + // return isSgNode(p); +} diff --git a/ir_cuda_suif_utils.cc b/ir_cuda_suif_utils.cc new file mode 100644 index 0000000..f15c190 --- /dev/null +++ b/ir_cuda_suif_utils.cc @@ -0,0 +1,54 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009 University of Utah + All Rights Reserved. + + Purpose: + SUIF interface utilities. + + Notes: + + Update history: + 01/2006 created by Chun Chen +*****************************************************************************/ + +#include <suif1.h> +#include "ir_suif_utils.hh" + + +/** + * Returns the body of the for loop found by finding the first loop in + * code, and if level > 1 recursively calling on the body of the found + * loop and (level-1) + */ +tree_node_list* loop_body_at_level(tree_node_list* tnl, int level) +{ + tree_node_list *inner_nl = 0; + //Now strip out the tnl on the inner level of the for loop + tree_node_list_iter tnli(tnl); + while (!tnli.is_empty()) { + tree_node *node = tnli.step(); + if(node->kind() == TREE_FOR) + { + //Found the first tree_for, call sibling function + inner_nl = loop_body_at_level((tree_for*)node, level); + break; + } + } + return inner_nl; +} + +tree_node_list* loop_body_at_level(tree_for* loop, int level) +{ + if(level > 1) + return loop_body_at_level(loop->body(), level-1); + return loop->body(); +} + +tree_node_list* swap_node_for_node_list(tree_node* tn, tree_node_list* new_tnl) +{ + tree_node_list* tnl = tn->parent(); + tnl->insert_after(new_tnl, tn->list_e()); + delete tnl->remove(tn->list_e()); + return tnl; +} diff --git a/ir_cudarose.cc b/ir_cudarose.cc new file mode 100644 index 0000000..6b31bdd --- /dev/null +++ b/ir_cudarose.cc @@ -0,0 +1,165 @@ +/***************************************************************************** + Copyright (C) 2009 University of Utah + All Rights Reserved. + + Purpose: + CHiLL's SUIF interface. + + Notes: + Array supports mixed pointer and array type in a single declaration. + + History: + 2/2/2011 Created by Protonu Basu. +*****************************************************************************/ + +#include <typeinfo> +#include "ir_cudarose.hh" +#include "loop.hh" +#include "loop_cuda_rose.hh" +//#include "ir_suif_utils.hh" + +using namespace SageBuilder; +using namespace SageInterface; + +IR_cudaroseCode::IR_cudaroseCode(const char *filename, const char* proc_name) : + IR_roseCode(filename, proc_name) { + + //std::string file_suffix = StringUtility::fileNameSuffix(filename); + + //if (CommandlineProcessing::isCFileNameSuffix(file_suffix)) + //{ + std::string orig_name = StringUtility::stripPathFromFileName(filename); + std::string naked_name = StringUtility::stripFileSuffixFromFileName( + orig_name); + file->set_unparse_output_filename("rose_" + naked_name + ".cu"); + + //} + + gsym_ = root; + first_scope = firstScope; + parameter = symtab2_; + body = symtab3_; + defn = func->get_definition()->get_body(); + func_defn = func->get_definition(); +} + + + +IR_ArraySymbol *IR_cudaroseCode::CreateArraySymbol(const IR_Symbol *sym, + std::vector<omega::CG_outputRepr *> &size, int sharedAnnotation) { + SgType *tn; + SgVariableSymbol* vs; + if (typeid(*sym) == typeid(IR_roseScalarSymbol)) { + tn = static_cast<const IR_roseScalarSymbol *>(sym)->vs_->get_type(); + } else if (typeid(*sym) == typeid(IR_roseArraySymbol)) { + tn = static_cast<const IR_roseArraySymbol *>(sym)->vs_->get_type(); + while (isSgArrayType(tn) || isSgPointerType(tn)) { + if (isSgArrayType(tn)) + tn = isSgArrayType(tn)->get_base_type(); + else if (isSgPointerType(tn)) + tn = isSgPointerType(tn)->get_base_type(); + else + throw ir_error( + "in CreateScalarSymbol: symbol not an array nor a pointer!"); + } + } else + throw std::bad_typeid(); + + for (int i = size.size() - 1; i >= 0; i--) + tn = buildArrayType(tn, + static_cast<omega::CG_roseRepr *>(size[i])->GetExpression()); + + static int rose_array_counter = 1; + std::string s = std::string("_P") + omega::to_string(rose_array_counter++); + SgVariableDeclaration* defn2 = buildVariableDeclaration( + const_cast<char *>(s.c_str()), tn); + SgInitializedNamePtrList& variables2 = defn2->get_variables(); + + SgInitializedNamePtrList::const_iterator i2 = variables2.begin(); + SgInitializedName* initializedName2 = *i2; + vs = new SgVariableSymbol(initializedName2); + + prependStatement(defn2, + isSgScopeStatement(func->get_definition()->get_body())); + + vs->set_parent(symtab_); + symtab_->insert(SgName(s.c_str()), vs); + + SgStatementPtrList* tnl5 = new SgStatementPtrList; + + (*tnl5).push_back(isSgStatement(defn2)); + + omega::CG_roseRepr* stmt = new omega::CG_roseRepr(tnl5); + + init_code_ = ocg_->StmtListAppend(init_code_, + static_cast<omega::CG_outputRepr *>(stmt)); + + if (sharedAnnotation == 1) + isSgNode(defn2)->setAttribute("__shared__", + new AstTextAttribute("__shared__")); + + return new IR_roseArraySymbol(this, vs); +} + +bool IR_cudaroseCode::commit_loop(Loop *loop, int loop_num) { + if (loop == NULL) + return true; + + LoopCuda *cu_loop = (LoopCuda *) loop; + SgNode *tnl = cu_loop->codegen(); + if (!tnl) + return false; + + SgStatementPtrList* new_list = NULL; + if (isSgBasicBlock(tnl)) { + new_list = new SgStatementPtrList; + for (SgStatementPtrList::iterator it = + isSgBasicBlock(tnl)->get_statements().begin(); + it != isSgBasicBlock(tnl)->get_statements().end(); it++) + (*new_list).push_back(*it); + } + + //Only thing that should be left will be the inserting of the tnl* into the loop + omega::CG_outputRepr *repr; + if (new_list == NULL) + repr = new omega::CG_roseRepr(tnl); + else + repr = new omega::CG_roseRepr(new_list); + if (cu_loop->init_code != NULL) + repr = ocg_->StmtListAppend(cu_loop->init_code->clone(), repr); + + std::vector<SgForStatement *> loops = find_loops( + func->get_definition()->get_body()); + tnl = isSgNode(loops[loop_num])->get_parent(); + + if (cu_loop->setup_code != NULL) { + SgStatementPtrList* setup_tnl = + static_cast<omega::CG_roseRepr *>(cu_loop->setup_code)->GetList(); + + SgStatement* target = isSgStatement(loops[loop_num]); + + for (SgStatementPtrList::iterator it = (*setup_tnl).begin(); + it != (*setup_tnl).end(); it++) { + + isSgStatement(tnl)->insert_statement(target, *it, false); + isSgNode(*it)->set_parent(tnl); + target = *it; + } + + //SgStatementPtrList + // for SgStatementPtrList::it + //TODO: I think this is a hack we can undo if we have loop->codegen() + //loo->getCode(), maybe also get rid of setup and teardown... + //fix_unfinished_comment(setup_tnl, indexes_string); + //isSgStatement(tnl)->replace_statement(isSgStatement(loops[loop_num]), *setup_tnl); + isSgStatement(tnl)->remove_statement(isSgStatement(loops[loop_num])); + } + + delete repr; + + return true; +} + +IR_cudaroseCode::~IR_cudaroseCode() { +} + diff --git a/ir_cudarose.hh b/ir_cudarose.hh new file mode 100644 index 0000000..34e0404 --- /dev/null +++ b/ir_cudarose.hh @@ -0,0 +1,46 @@ +#ifndef IR_CUDA_ROSE +#define IR_CUDA_ROSE + +#include <code_gen/CG_roseRepr.h> +#include <code_gen/CG_roseBuilder.h> +#include "ir_rose.hh" +#include "loop.hh" +#include "loop_cuda_rose.hh" +#include "ir_rose_utils.hh" + + + +class IR_cudaroseCode : public IR_roseCode{ + +public: + + + IR_cudaroseCode(const char *filename, const char* proc_name); + + + + SgGlobal *gsym_; + SgScopeStatement* defn; + SgGlobal* first_scope; + SgSymbolTable* parameter; + SgSymbolTable* body; + SgFunctionDefinition* func_defn; + std::vector<SgSymbolTable*> write_procs;//procs to write + + + IR_ArraySymbol *CreateArraySymbol(const IR_Symbol *sym, std::vector<omega::CG_outputRepr *> &size,int sharedAnnotation = 1); + omega::CG_outputRepr* init_code(){ return init_code_; } + bool commit_loop(Loop *loop, int loop_num); + std::vector<SgForStatement *> get_loops() + { + std::vector<SgForStatement *> loops = find_loops(func->get_definition()->get_body()); + return loops; + } + + ~IR_cudaroseCode(); + +}; + + +#endif + diff --git a/ir_cudasuif.cc b/ir_cudasuif.cc new file mode 100644 index 0000000..c646e13 --- /dev/null +++ b/ir_cudasuif.cc @@ -0,0 +1,144 @@ +/***************************************************************************** + Copyright (C) 2009 University of Utah + All Rights Reserved. + + Purpose: + CHiLL's SUIF interface. + + Notes: + Array supports mixed pointer and array type in a single declaration. + + History: + 2/2/2011 Created by Protonu Basu. +*****************************************************************************/ + +#include <typeinfo> +#include "ir_cudasuif.hh" +#include "loop.hh" +#include "loop_cuda.hh" +#include "ir_suif_utils.hh" + + +IR_cudasuifCode::IR_cudasuifCode(const char *filename, int proc_num) + :IR_suifCode(filename, proc_num) +{ + //setting up gsym_ here + fileset->reset_iter(); + gsym_ = fileset->globals(); + +} + + + +IR_ArraySymbol *IR_cudasuifCode::CreateArraySymbol(const IR_Symbol *sym, + std::vector<omega::CG_outputRepr *> &size, + int sharedAnnotation) +{ + type_node *tn; + + if (typeid(*sym) == typeid(IR_suifScalarSymbol)) { + tn = static_cast<const IR_suifScalarSymbol *>(sym)->vs_->type(); + } + else if (typeid(*sym) == typeid(IR_suifArraySymbol)) { + tn = static_cast<const IR_suifArraySymbol *>(sym)->vs_->type(); + if (tn->is_modifier()) + tn = static_cast<modifier_type *>(tn)->base(); + while (tn->is_array() || tn->is_ptr()) { + if (tn->is_array()) + tn = static_cast<array_type *>(tn)->elem_type(); + else if (tn->is_ptr()) + tn = static_cast<ptr_type *>(tn)->ref_type(); + } + } + else + throw std::bad_typeid(); + + if (is_fortran_) + for (int i = 0; i < size.size(); i++) { + var_sym *temporary = symtab_->new_unique_var(type_s32); + init_code_ = ocg_->StmtListAppend(init_code_, ocg_->StmtListAppend(ocg_->CreateAssignment(0, new omega::CG_suifRepr(operand(temporary)), size[i]),NULL)); + + tn = new array_type(tn, array_bound(1), array_bound(temporary)); + symtab_->add_type(tn); + } + else + for (int i = size.size()-1; i >= 0; i--) { + var_sym *temporary = symtab_->new_unique_var(type_s32); + //init_code_ = ocg_->StmtListAppend(init_code_, ocg_->CreateStmtList(ocg_->CreateAssignment(0, new omega::CG_suifRepr(operand(temporary)), size[i]))); + init_code_ = ocg_->StmtListAppend(init_code_, ocg_->StmtListAppend(ocg_->CreateAssignment(0, new omega::CG_suifRepr(operand(temporary)), size[i]), NULL)); + + tn = new array_type(tn, array_bound(1), array_bound(temporary)); + symtab_->add_type(tn); + if(i == 0 && sharedAnnotation == 1){ + tn = static_cast<omega::CG_suifBuilder*>(ocg_)->ModifyType(tn, "__shared__"); + symtab_->add_type(tn); + } + } + + static int suif_array_counter = 1; + std::string s = std::string("_P") + omega::to_string(suif_array_counter++); + var_sym *vs = new var_sym(tn, const_cast<char *>(s.c_str())); + vs->add_to_table(symtab_); + + return new IR_suifArraySymbol(this, vs); +} + + +bool IR_cudasuifCode::commit_loop(Loop *loop, int loop_num) { + if (loop == NULL) + return true; + + //Call code-gen part of any scripting routines that were run. + // internally call GetCode + // Add stuff before and after (setup, teardown + // return a tnl + LoopCuda *cu_loop = (LoopCuda *)loop; + tree_node_list *tnl = cu_loop->codegen(); + if(!tnl) + return false; + + //set up our new procs + for(int i=0; i<cu_loop->new_procs.size(); i++) + { + printf("setting proc fse\n"); + cu_loop->new_procs[i]->set_fse(fse_); + write_procs.push_back(cu_loop->new_procs[i]); + } + + //Only thing that should be left will be the inserting of the tnl* into the loop + + omega::CG_outputRepr *repr = new omega::CG_suifRepr(tnl); + if (cu_loop->init_code != NULL) + repr = ocg_->StmtListAppend(cu_loop->init_code->clone(), repr); + + std::vector<tree_for *> loops = find_loops(psym_->block()->body()); + tnl = loops[loop_num]->parent(); + + if (cu_loop->setup_code != NULL) { + tree_node_list *setup_tnl = static_cast<omega::CG_suifRepr *>(cu_loop->setup_code->clone())->GetCode(); + //TODO: I think this is a hack we can undo if we have loop->codegen() + //loo->getCode(), maybe also get rid of setup and teardown... + //fix_unfinished_comment(setup_tnl, indexes_string); + tnl->insert_before(setup_tnl, loops[loop_num]->list_e()); + } + tnl->insert_before(static_cast<omega::CG_suifRepr *>(repr)->GetCode(), loops[loop_num]->list_e()); + if (cu_loop->teardown_code != NULL) { + tree_node_list *setup_tnl = static_cast<omega::CG_suifRepr *>(cu_loop->teardown_code->clone())->GetCode(); + tnl->insert_before(setup_tnl, loops[loop_num]->list_e()); + } + + tnl->remove(loops[loop_num]->list_e()); + + delete repr; + return true; +} + +IR_cudasuifCode::~IR_cudasuifCode() +{ + for(int i=0; i<write_procs.size(); i++) + { + if (!write_procs[i]->is_written()) + write_procs[i]->write_proc(fse_); + write_procs[i]->flush_proc(); + } +} diff --git a/ir_cudasuif.hh b/ir_cudasuif.hh new file mode 100644 index 0000000..834778e --- /dev/null +++ b/ir_cudasuif.hh @@ -0,0 +1,36 @@ +#ifndef IR_CUDA_SUIF +#define IR_CUDA_SUIF + +#include <code_gen/CG_suifRepr.h> +#include <code_gen/CG_suifBuilder.h> +#include "ir_suif.hh" +#include "loop.hh" +#include "loop_cuda.hh" +#include "ir_suif_utils.hh" + + + +class IR_cudasuifCode : public IR_suifCode{ + +public: + global_symtab *gsym_; + std::vector<proc_sym*> write_procs;//procs to write + + + IR_cudasuifCode(const char *filename, int proc_num); + IR_ArraySymbol *CreateArraySymbol(const IR_Symbol *sym, + std::vector<omega::CG_outputRepr *> &size, + int sharedAnnotation = 1); + omega::CG_outputRepr* init_code(){ return init_code_; } + bool commit_loop(Loop *loop, int loop_num); + std::vector<tree_for *> get_loops() + { + std::vector<tree_for *> loops = find_loops(psym_->block()->body()); + return loops; + } + ~IR_cudasuifCode(); + +}; + + +#endif diff --git a/ir_rose.cc b/ir_rose.cc new file mode 100644 index 0000000..5acb175 --- /dev/null +++ b/ir_rose.cc @@ -0,0 +1,2296 @@ +/***************************************************************************** + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + CHiLL's rose interface. + + Notes: + Array supports mixed pointer and array type in a single declaration. + + History: + 02/23/2009 Created by Chun Chen. +*****************************************************************************/ +#include <string> +#include "ir_rose.hh" +#include "ir_rose_utils.hh" +#include <code_gen/rose_attributes.h> +#include <code_gen/CG_roseRepr.h> +#include <code_gen/CG_roseBuilder.h> + +using namespace SageBuilder; +using namespace SageInterface; +using namespace omega; +// ---------------------------------------------------------------------------- +// Class: IR_roseScalarSymbol +// ---------------------------------------------------------------------------- + +std::string IR_roseScalarSymbol::name() const { + return vs_->get_name().getString(); +} + +int IR_roseScalarSymbol::size() const { + return (vs_->get_type()->memoryUsage()) / (vs_->get_type()->numberOfNodes()); +} + +bool IR_roseScalarSymbol::operator==(const IR_Symbol &that) const { + if (typeid(*this) != typeid(that)) + return false; + + const IR_roseScalarSymbol *l_that = + static_cast<const IR_roseScalarSymbol *>(&that); + return this->vs_ == l_that->vs_; +} + +IR_Symbol *IR_roseScalarSymbol::clone() const { + return NULL; +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseArraySymbol +// ---------------------------------------------------------------------------- + +std::string IR_roseArraySymbol::name() const { + return (vs_->get_declaration()->get_name().getString()); +} + +int IR_roseArraySymbol::elem_size() const { + + SgType *tn = vs_->get_type(); + SgType* arrType; + + int elemsize; + + if (arrType = isSgArrayType(tn)) { + while (isSgArrayType(arrType)) { + arrType = arrType->findBaseType(); + } + } else if (arrType = isSgPointerType(tn)) { + while (isSgPointerType(arrType)) { + arrType = arrType->findBaseType(); + } + } + + elemsize = (int) arrType->memoryUsage() / arrType->numberOfNodes(); + return elemsize; +} + +int IR_roseArraySymbol::n_dim() const { + int dim = 0; + SgType* arrType = isSgArrayType(vs_->get_type()); + SgType* ptrType = isSgPointerType(vs_->get_type()); + if (arrType != NULL) { + while (isSgArrayType(arrType)) { + arrType = isSgArrayType(arrType)->get_base_type(); + dim++; + } + } else if (ptrType != NULL) { + while (isSgPointerType(ptrType)) { + ptrType = isSgPointerType(ptrType)->get_base_type(); + dim++; + } + } + + // Manu:: fortran support + if (static_cast<const IR_roseCode *>(ir_)->is_fortran_) { + + if (arrType != NULL) { + dim = 0; + SgExprListExp * dimList = isSgArrayType(vs_->get_type())->get_dim_info(); + SgExpressionPtrList::iterator it = dimList->get_expressions().begin(); + for(;it != dimList->get_expressions().end(); it++) { + dim++; + } + } else if (ptrType != NULL) { + //std::cout << "pntrType \n"; + ; // not sure if this case will happen + } + } + + return dim; +} + +omega::CG_outputRepr *IR_roseArraySymbol::size(int dim) const { + + SgArrayType* arrType = isSgArrayType(vs_->get_type()); + // SgExprListExp* dimList = arrType->get_dim_info(); + int count = 0; + SgExpression* expr; + SgType* pntrType = isSgPointerType(vs_->get_type()); + + if (arrType != NULL) { + SgExprListExp* dimList = arrType->get_dim_info(); + if (!static_cast<const IR_roseCode *>(ir_)->is_fortran_) { + SgExpressionPtrList::iterator it = + dimList->get_expressions().begin(); + + while ((it != dimList->get_expressions().end()) && (count < dim)) { + it++; + count++; + } + + expr = *it; + } else { + SgExpressionPtrList::reverse_iterator i = + dimList->get_expressions().rbegin(); + for (; (i != dimList->get_expressions().rend()) && (count < dim); + i++) { + + count++; + } + + expr = *i; + } + } else if (pntrType != NULL) { + + while (count < dim) { + pntrType = (isSgPointerType(pntrType))->get_base_type(); + count++; + } + if (isSgPointerType(pntrType)) + expr = new SgExpression; + } + + if (!expr) + throw ir_error("Index variable is NULL!!"); + + // Manu :: debug + std::cout << "---------- size :: " << isSgNode(expr)->unparseToString().c_str() << "\n"; + + return new omega::CG_roseRepr(expr); + +} + +IR_ARRAY_LAYOUT_TYPE IR_roseArraySymbol::layout_type() const { + if (static_cast<const IR_roseCode *>(ir_)->is_fortran_) + return IR_ARRAY_LAYOUT_COLUMN_MAJOR; + else + return IR_ARRAY_LAYOUT_ROW_MAJOR; + +} + +bool IR_roseArraySymbol::operator==(const IR_Symbol &that) const { + + if (typeid(*this) != typeid(that)) + return false; + + const IR_roseArraySymbol *l_that = + static_cast<const IR_roseArraySymbol *>(&that); + return this->vs_ == l_that->vs_; + +} + +IR_Symbol *IR_roseArraySymbol::clone() const { + return new IR_roseArraySymbol(ir_, vs_); +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseConstantRef +// ---------------------------------------------------------------------------- + +bool IR_roseConstantRef::operator==(const IR_Ref &that) const { + + if (typeid(*this) != typeid(that)) + return false; + + const IR_roseConstantRef *l_that = + static_cast<const IR_roseConstantRef *>(&that); + + if (this->type_ != l_that->type_) + return false; + + if (this->type_ == IR_CONSTANT_INT) + return this->i_ == l_that->i_; + else + return this->f_ == l_that->f_; + +} + +omega::CG_outputRepr *IR_roseConstantRef::convert() { + if (type_ == IR_CONSTANT_INT) { + omega::CG_roseRepr *result = new omega::CG_roseRepr( + isSgExpression(buildIntVal(static_cast<int>(i_)))); + delete this; + return result; + } else + throw ir_error("constant type not supported"); + +} + +IR_Ref *IR_roseConstantRef::clone() const { + if (type_ == IR_CONSTANT_INT) + return new IR_roseConstantRef(ir_, i_); + else if (type_ == IR_CONSTANT_FLOAT) + return new IR_roseConstantRef(ir_, f_); + else + throw ir_error("constant type not supported"); + +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseScalarRef +// ---------------------------------------------------------------------------- + +bool IR_roseScalarRef::is_write() const { + /* if (ins_pos_ != NULL && op_pos_ == -1) + return true; + else + return false; + */ + + if (is_write_ == 1) + return true; + + return false; +} + +IR_ScalarSymbol *IR_roseScalarRef::symbol() const { + return new IR_roseScalarSymbol(ir_, vs_->get_symbol()); +} + +bool IR_roseScalarRef::operator==(const IR_Ref &that) const { + if (typeid(*this) != typeid(that)) + return false; + + const IR_roseScalarRef *l_that = + static_cast<const IR_roseScalarRef *>(&that); + + if (this->ins_pos_ == NULL) + return this->vs_ == l_that->vs_; + else + return this->ins_pos_ == l_that->ins_pos_ + && this->op_pos_ == l_that->op_pos_; +} + +omega::CG_outputRepr *IR_roseScalarRef::convert() { + omega::CG_roseRepr *result = new omega::CG_roseRepr(isSgExpression(vs_)); + delete this; + return result; + +} + +IR_Ref * IR_roseScalarRef::clone() const { + //if (ins_pos_ == NULL) + return new IR_roseScalarRef(ir_, vs_, this->is_write_); + //else + // return new IR_roseScalarRef(ir_, , op_pos_); + +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseArrayRef +// ---------------------------------------------------------------------------- + +bool IR_roseArrayRef::is_write() const { + SgAssignOp* assignment; + + if (is_write_ == 1 || is_write_ == 0) + return is_write_; + if (assignment = isSgAssignOp(ia_->get_parent())) { + if (assignment->get_lhs_operand() == ia_) + return true; + } else if (SgExprStatement* expr_stmt = isSgExprStatement( + ia_->get_parent())) { + SgExpression* exp = expr_stmt->get_expression(); + + if (exp) { + if (assignment = isSgAssignOp(exp)) { + if (assignment->get_lhs_operand() == ia_) + return true; + + } + } + + } + return false; +} + +omega::CG_outputRepr *IR_roseArrayRef::index(int dim) const { + + SgExpression *current = isSgExpression(ia_); + SgExpression* expr; + int count = 0; + + while (isSgPntrArrRefExp(current)) { + current = isSgPntrArrRefExp(current)->get_lhs_operand(); + count++; + } + + current = ia_; + + while (count > dim) { + expr = isSgPntrArrRefExp(current)->get_rhs_operand(); + current = isSgPntrArrRefExp(current)->get_lhs_operand(); + count--; + } + + // Manu:: fortran support + if (static_cast<const IR_roseCode *>(ir_)->is_fortran_) { + expr = isSgPntrArrRefExp(ia_)->get_rhs_operand(); + count = 0; + if (isSgExprListExp(expr)) { + SgExpressionPtrList::iterator indexList = isSgExprListExp(expr)->get_expressions().begin(); + while (count < dim) { + indexList++; + count++; + } + expr = isSgExpression(*indexList); + } + } + + if (!expr) + throw ir_error("Index variable is NULL!!"); + + + omega::CG_roseRepr* ind = new omega::CG_roseRepr(expr); + + return ind->clone(); + +} + +IR_ArraySymbol *IR_roseArrayRef::symbol() const { + + SgExpression *current = isSgExpression(ia_); + + SgVarRefExp* base; + SgVariableSymbol *arrSymbol; + while (isSgPntrArrRefExp(current) || isSgUnaryOp(current)) { + if (isSgPntrArrRefExp(current)) + current = isSgPntrArrRefExp(current)->get_lhs_operand(); + else if (isSgUnaryOp(current)) + /* To handle support for addressof operator and pointer dereference + * both of which are unary ops + */ + current = isSgUnaryOp(current)->get_operand(); + } + if (base = isSgVarRefExp(current)) { + arrSymbol = (SgVariableSymbol*) (base->get_symbol()); + std::string x = arrSymbol->get_name().getString(); + } else + throw ir_error("Array Symbol is not a variable?!"); + + return new IR_roseArraySymbol(ir_, arrSymbol); + +} + +bool IR_roseArrayRef::operator==(const IR_Ref &that) const { + if (typeid(*this) != typeid(that)) + return false; + + const IR_roseArrayRef *l_that = static_cast<const IR_roseArrayRef *>(&that); + + return this->ia_ == l_that->ia_; +} + +omega::CG_outputRepr *IR_roseArrayRef::convert() { + omega::CG_roseRepr *temp = new omega::CG_roseRepr( + isSgExpression(this->ia_)); + omega::CG_outputRepr *result = temp->clone(); +// delete this; // Commented by Manu + return result; +} + +IR_Ref *IR_roseArrayRef::clone() const { + return new IR_roseArrayRef(ir_, ia_, is_write_); +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseLoop +// ---------------------------------------------------------------------------- + +IR_ScalarSymbol *IR_roseLoop::index() const { + SgForStatement *tf = isSgForStatement(tf_); + SgFortranDo *tfortran = isSgFortranDo(tf_); + SgVariableSymbol* vs = NULL; + if (tf) { + SgForInitStatement* list = tf->get_for_init_stmt(); + SgStatementPtrList& initStatements = list->get_init_stmt(); + SgStatementPtrList::const_iterator j = initStatements.begin(); + + if (SgExprStatement *expr = isSgExprStatement(*j)) + if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) + if (SgVarRefExp* var_ref = isSgVarRefExp(op->get_lhs_operand())) + vs = var_ref->get_symbol(); + } else if (tfortran) { + SgExpression* init = tfortran->get_initialization(); + + if (SgAssignOp* op = isSgAssignOp(init)) + if (SgVarRefExp* var_ref = isSgVarRefExp(op->get_lhs_operand())) + vs = var_ref->get_symbol(); + + } + + if (vs == NULL) + throw ir_error("Index variable is NULL!!"); + + return new IR_roseScalarSymbol(ir_, vs); +} + +omega::CG_outputRepr *IR_roseLoop::lower_bound() const { + SgForStatement *tf = isSgForStatement(tf_); + SgFortranDo *tfortran = isSgFortranDo(tf_); + + SgExpression* lowerBound = NULL; + + if (tf) { + SgForInitStatement* list = tf->get_for_init_stmt(); + SgStatementPtrList& initStatements = list->get_init_stmt(); + SgStatementPtrList::const_iterator j = initStatements.begin(); + + if (SgExprStatement *expr = isSgExprStatement(*j)) + if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) { + lowerBound = op->get_rhs_operand(); + //Rose sometimes introduces an unnecessary cast which is a unary op + if (isSgUnaryOp(lowerBound)) + lowerBound = isSgUnaryOp(lowerBound)->get_operand(); + + } + } else if (tfortran) { + SgExpression* init = tfortran->get_initialization(); + + if (SgAssignOp* op = isSgAssignOp(init)) + lowerBound = op->get_rhs_operand(); + } + + if (lowerBound == NULL) + throw ir_error("Lower Bound is NULL!!"); + + return new omega::CG_roseRepr(lowerBound); +} + +omega::CG_outputRepr *IR_roseLoop::upper_bound() const { + SgForStatement *tf = isSgForStatement(tf_); + SgFortranDo *tfortran = isSgFortranDo(tf_); + SgExpression* upperBound = NULL; + if (tf) { + SgBinaryOp* test_expr = isSgBinaryOp(tf->get_test_expr()); + if (test_expr == NULL) + throw ir_error("Test Expression is NULL!!"); + + upperBound = test_expr->get_rhs_operand(); + //Rose sometimes introduces an unnecessary cast which is a unary op + if (isSgUnaryOp(upperBound)) + upperBound = isSgUnaryOp(upperBound)->get_operand(); + if (upperBound == NULL) + throw ir_error("Upper Bound is NULL!!"); + } else if (tfortran) { + + upperBound = tfortran->get_bound(); + + } + + return new omega::CG_roseRepr(upperBound); + +} + +IR_CONDITION_TYPE IR_roseLoop::stop_cond() const { + SgForStatement *tf = isSgForStatement(tf_); + SgFortranDo *tfortran = isSgFortranDo(tf_); + + if (tf) { + SgExpression* stopCond = NULL; + SgExpression* test_expr = tf->get_test_expr(); + + if (isSgLessThanOp(test_expr)) + return IR_COND_LT; + else if (isSgLessOrEqualOp(test_expr)) + return IR_COND_LE; + else if (isSgGreaterThanOp(test_expr)) + return IR_COND_GT; + else if (isSgGreaterOrEqualOp(test_expr)) + return IR_COND_GE; + + else + throw ir_error("loop stop condition unsupported"); + } else if (tfortran) { + SgExpression* increment = tfortran->get_increment(); + if (!isSgNullExpression(increment)) { + if (isSgMinusOp(increment) + && !isSgBinaryOp(isSgMinusOp(increment)->get_operand())) + return IR_COND_GE; + else + return IR_COND_LE; + } else { + return IR_COND_LE; // Manu:: if increment is not present, assume it to be 1. Just a workaround, not sure if it will be correct for all cases. + SgExpression* lowerBound = NULL; + SgExpression* upperBound = NULL; + SgExpression* init = tfortran->get_initialization(); + SgIntVal* ub; + SgIntVal* lb; + if (SgAssignOp* op = isSgAssignOp(init)) + lowerBound = op->get_rhs_operand(); + + upperBound = tfortran->get_bound(); + + if ((upperBound != NULL) && (lowerBound != NULL)) { + + if ((ub = isSgIntVal(isSgValueExp(upperBound))) && (lb = + isSgIntVal(isSgValueExp(lowerBound)))) { + if (ub->get_value() > lb->get_value()) + return IR_COND_LE; + else + return IR_COND_GE; + } else + throw ir_error("loop stop condition unsupported"); + + } else + throw ir_error("malformed fortran loop bounds!!"); + + } + } + +} + +IR_Block *IR_roseLoop::body() const { + SgForStatement *tf = isSgForStatement(tf_); + SgFortranDo *tfortran = isSgFortranDo(tf_); + SgNode* loop_body = NULL; + SgStatement* body_statements = NULL; + + if (tf) { + body_statements = tf->get_loop_body(); + } else if (tfortran) { + body_statements = isSgStatement(tfortran->get_body()); + + } + + loop_body = isSgNode(body_statements); + + SgStatementPtrList list; + if (isSgBasicBlock(loop_body)) { + list = isSgBasicBlock(loop_body)->get_statements(); + + if (list.size() == 1) + loop_body = isSgNode(*(list.begin())); + } + + if (loop_body == NULL) + throw ir_error("for loop body is NULL!!"); + + return new IR_roseBlock(ir_, loop_body); +} + +int IR_roseLoop::step_size() const { + + SgForStatement *tf = isSgForStatement(tf_); + SgFortranDo *tfortran = isSgFortranDo(tf_); + + if (tf) { + SgExpression *increment = tf->get_increment(); + + if (isSgPlusPlusOp(increment)) + return 1; + if (isSgMinusMinusOp(increment)) + return -1; + else if (SgAssignOp* assignment = isSgAssignOp(increment)) { + SgBinaryOp* stepsize = isSgBinaryOp(assignment->get_lhs_operand()); + if (stepsize == NULL) + throw ir_error("Step size expression is NULL!!"); + SgIntVal* step = isSgIntVal(stepsize->get_lhs_operand()); + return step->get_value(); + } else if (SgBinaryOp* inc = isSgPlusAssignOp(increment)) { + SgIntVal* step = isSgIntVal(inc->get_rhs_operand()); + return (step->get_value()); + } else if (SgBinaryOp * inc = isSgMinusAssignOp(increment)) { + SgIntVal* step = isSgIntVal(inc->get_rhs_operand()); + return -(step->get_value()); + } else if (SgBinaryOp * inc = isSgCompoundAssignOp(increment)) { + SgIntVal* step = isSgIntVal(inc->get_rhs_operand()); + return (step->get_value()); + } + + } else if (tfortran) { + + SgExpression* increment = tfortran->get_increment(); + + if (!isSgNullExpression(increment)) { + if (isSgMinusOp(increment)) { + if (SgValueExp *inc = isSgValueExp( + isSgMinusOp(increment)->get_operand())) + if (isSgIntVal(inc)) + return -(isSgIntVal(inc)->get_value()); + } else { + if (SgValueExp* inc = isSgValueExp(increment)) + if (isSgIntVal(inc)) + return isSgIntVal(inc)->get_value(); + } + } else { + return 1; // Manu:: if increment is not present, assume it to be 1. Just a workaround, not sure if it will be correct for all cases. + SgExpression* lowerBound = NULL; + SgExpression* upperBound = NULL; + SgExpression* init = tfortran->get_initialization(); + SgIntVal* ub; + SgIntVal* lb; + if (SgAssignOp* op = isSgAssignOp(init)) + lowerBound = op->get_rhs_operand(); + + upperBound = tfortran->get_bound(); + + if ((upperBound != NULL) && (lowerBound != NULL)) { + + if ((ub = isSgIntVal(isSgValueExp(upperBound))) && (lb = + isSgIntVal(isSgValueExp(lowerBound)))) { + if (ub->get_value() > lb->get_value()) + return 1; + else + return -1; + } else + throw ir_error("loop stop condition unsupported"); + + } else + throw ir_error("loop stop condition unsupported"); + + } + + } + +} + +IR_Block *IR_roseLoop::convert() { + const IR_Code *ir = ir_; + SgNode *tnl = isSgNode(tf_); + delete this; + return new IR_roseBlock(ir, tnl); +} + +IR_Control *IR_roseLoop::clone() const { + + return new IR_roseLoop(ir_, tf_); + +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseBlock +// ---------------------------------------------------------------------------- + +omega::CG_outputRepr *IR_roseBlock::original() const { + + omega::CG_outputRepr * tnl; + + if (isSgBasicBlock(tnl_)) { + + SgStatementPtrList *bb = new SgStatementPtrList(); + SgStatementPtrList::iterator it; + for (it = (isSgBasicBlock(tnl_)->get_statements()).begin(); + it != (isSgBasicBlock(tnl_)->get_statements()).end() + && (*it != start_); it++) + ; + + if (it != (isSgBasicBlock(tnl_)->get_statements()).end()) { + for (; it != (isSgBasicBlock(tnl_)->get_statements()).end(); it++) { + bb->push_back(*it); + if ((*it) == end_) + break; + } + } + tnl = new omega::CG_roseRepr(bb); + //block = tnl->clone(); + + } else { + tnl = new omega::CG_roseRepr(tnl_); + + //block = tnl->clone(); + } + + return tnl; + +} +omega::CG_outputRepr *IR_roseBlock::extract() const { + + std::string x = tnl_->unparseToString(); + + omega::CG_roseRepr * tnl; + + omega::CG_outputRepr* block; + + if (isSgBasicBlock(tnl_)) { + + SgStatementPtrList *bb = new SgStatementPtrList(); + SgStatementPtrList::iterator it; + for (it = (isSgBasicBlock(tnl_)->get_statements()).begin(); + it != (isSgBasicBlock(tnl_)->get_statements()).end() + && (*it != start_); it++) + ; + + if (it != (isSgBasicBlock(tnl_)->get_statements()).end()) { + for (; it != (isSgBasicBlock(tnl_)->get_statements()).end(); it++) { + bb->push_back(*it); + if ((*it) == end_) + break; + } + } + tnl = new omega::CG_roseRepr(bb); + block = tnl->clone(); + + } else { + tnl = new omega::CG_roseRepr(tnl_); + + block = tnl->clone(); + } + + delete tnl; + return block; +} + +IR_Control *IR_roseBlock::clone() const { + return new IR_roseBlock(ir_, tnl_, start_, end_); + +} +// ---------------------------------------------------------------------------- +// Class: IR_roseIf +// ---------------------------------------------------------------------------- +omega::CG_outputRepr *IR_roseIf::condition() const { + SgNode *tnl = isSgNode(isSgIfStmt(ti_)->get_conditional()); + SgExpression* exp = NULL; + if (SgExprStatement* stmt = isSgExprStatement(tnl)) + exp = stmt->get_expression(); + /* + SgExpression *op = iter(tnl); + if (iter.is_empty()) + throw ir_error("unrecognized if structure"); + tree_node *tn = iter.step(); + if (!iter.is_empty()) + throw ir_error("unrecognized if structure"); + if (!tn->is_instr()) + throw ir_error("unrecognized if structure"); + instruction *ins = static_cast<tree_instr *>(tn)->instr(); + if (!ins->opcode() == io_bfalse) + throw ir_error("unrecognized if structure"); + operand op = ins->src_op(0);*/ + if (exp == NULL) + return new omega::CG_roseRepr(tnl); + else + return new omega::CG_roseRepr(exp); +} + +IR_Block *IR_roseIf::then_body() const { + SgNode *tnl = isSgNode(isSgIfStmt(ti_)->get_true_body()); + + //tree_node_list *tnl = ti_->then_part(); + if (tnl == NULL) + return NULL; + /* + tree_node_list_iter iter(tnl); + if (iter.is_empty()) + return NULL; */ + + return new IR_roseBlock(ir_, tnl); +} + +IR_Block *IR_roseIf::else_body() const { + SgNode *tnl = isSgNode(isSgIfStmt(ti_)->get_false_body()); + + //tree_node_list *tnl = ti_->else_part(); + + if (tnl == NULL) + return NULL; + /* + tree_node_list_iter iter(tnl); + if (iter.is_empty()) + return NULL;*/ + + return new IR_roseBlock(ir_, tnl); +} + +IR_Block *IR_roseIf::convert() { + const IR_Code *ir = ir_; + /* SgNode *tnl = ti_->get_parent(); + SgNode *start, *end; + start = end = ti_; + + //tree_node_list *tnl = ti_->parent(); + //tree_node_list_e *start, *end; + //start = end = ti_->list_e(); + */ + delete this; + return new IR_roseBlock(ir, ti_); +} + +IR_Control *IR_roseIf::clone() const { + return new IR_roseIf(ir_, ti_); +} + +// -----------------------------------------------------------y----------------- +// Class: IR_roseCode_Global_Init +// ---------------------------------------------------------------------------- + +IR_roseCode_Global_Init *IR_roseCode_Global_Init::pinstance = 0; + +IR_roseCode_Global_Init * IR_roseCode_Global_Init::Instance(char** argv) { + if (pinstance == 0) { + pinstance = new IR_roseCode_Global_Init; + pinstance->project = frontend(2, argv); + + } + return pinstance; +} + +// ---------------------------------------------------------------------------- +// Class: IR_roseCode +// ---------------------------------------------------------------------------- + +IR_roseCode::IR_roseCode(const char *filename, const char* proc_name) : + IR_Code() { + + SgProject* project; + + char* argv[2]; + int counter = 0; + argv[0] = (char*) malloc(5 * sizeof(char)); + argv[1] = (char*) malloc((strlen(filename) + 1) * sizeof(char)); + strcpy(argv[0], "rose"); + strcpy(argv[1], filename); + + project = (IR_roseCode_Global_Init::Instance(argv))->project; + //main_ssa = new ssa_unfiltered_cfg::SSA_UnfilteredCfg(project); + //main_ssa->run(); + firstScope = getFirstGlobalScope(project); + SgFilePtrList& file_list = project->get_fileList(); + + for (SgFilePtrList::iterator it = file_list.begin(); it != file_list.end(); + it++) { + file = isSgSourceFile(*it); + if (file->get_outputLanguage() == SgFile::e_Fortran_output_language) + is_fortran_ = true; + else + is_fortran_ = false; + + // Manu:: debug + // if (is_fortran_) + // std::cout << "Input is a fortran file\n"; + // else + // std::cout << "Input is a C file\n"; + + root = file->get_globalScope(); + + if (!is_fortran_) { // Manu:: this macro should not be created if the input code is in fortran + buildCpreprocessorDefineDeclaration(root, + "#define __rose_lt(x,y) ((x)<(y)?(x):(y))", + PreprocessingInfo::before); + buildCpreprocessorDefineDeclaration(root, + "#define __rose_gt(x,y) ((x)>(y)?(x):(y))", + PreprocessingInfo::before); + } + + symtab_ = isSgScopeStatement(root)->get_symbol_table(); + SgDeclarationStatementPtrList& declList = root->get_declarations(); + + p = declList.begin(); + + while (p != declList.end()) { + func = isSgFunctionDeclaration(*p); + if (func) { + if (!strcmp((func->get_name().getString()).c_str(), proc_name)) + break; + + } + p++; + counter++; + } + if (p != declList.end()) + break; + + } + + symtab2_ = func->get_definition()->get_symbol_table(); + symtab3_ = func->get_definition()->get_body()->get_symbol_table(); + // ocg_ = new omega::CG_roseBuilder(func->get_definition()->get_body()->get_symbol_table() , isSgNode(func->get_definition()->get_body())); + // Manu:: added is_fortran_ parameter + ocg_ = new omega::CG_roseBuilder(is_fortran_, root, firstScope, + func->get_definition()->get_symbol_table(), + func->get_definition()->get_body()->get_symbol_table(), + isSgNode(func->get_definition()->get_body())); + + i_ = 0; /*i_ handling may need revision */ + + free(argv[1]); + free(argv[0]); + +} + +IR_roseCode::~IR_roseCode() { +} + +void IR_roseCode::finalizeRose() { + // Moved this out of the deconstructor + // ???? + SgProject* project = (IR_roseCode_Global_Init::Instance(NULL))->project; + // -- Causes coredump. commented out for now -- // + // processes attributes left in Rose Ast + //postProcessRoseCodeInsertion(project); + project->unparse(); + //backend((IR_roseCode_Global_Init::Instance(NULL))->project); +} + +IR_ScalarSymbol *IR_roseCode::CreateScalarSymbol(const IR_Symbol *sym, int) { + char str1[14]; + if (typeid(*sym) == typeid(IR_roseScalarSymbol)) { + SgType *tn = + static_cast<const IR_roseScalarSymbol *>(sym)->vs_->get_type(); + sprintf(str1, "newVariable%i\0", i_); + SgVariableDeclaration* defn = buildVariableDeclaration(str1, tn); + i_++; + + SgInitializedNamePtrList& variables = defn->get_variables(); + SgInitializedNamePtrList::const_iterator i = variables.begin(); + SgInitializedName* initializedName = *i; + SgVariableSymbol* vs = new SgVariableSymbol(initializedName); + + prependStatement(defn, + isSgScopeStatement(func->get_definition()->get_body())); + vs->set_parent(symtab_); + symtab_->insert(str1, vs); + + if (vs == NULL) + throw ir_error("in CreateScalarSymbol: vs is NULL!!"); + + return new IR_roseScalarSymbol(this, vs); + } else if (typeid(*sym) == typeid(IR_roseArraySymbol)) { + SgType *tn1 = + static_cast<const IR_roseArraySymbol *>(sym)->vs_->get_type(); + while (isSgArrayType(tn1) || isSgPointerType(tn1)) { + if (isSgArrayType(tn1)) + tn1 = isSgArrayType(tn1)->get_base_type(); + else if (isSgPointerType(tn1)) + tn1 = isSgPointerType(tn1)->get_base_type(); + else + throw ir_error( + "in CreateScalarSymbol: symbol not an array nor a pointer!"); + } + + sprintf(str1, "newVariable%i\0", i_); + i_++; + + SgVariableDeclaration* defn1 = buildVariableDeclaration(str1, tn1); + SgInitializedNamePtrList& variables1 = defn1->get_variables(); + + SgInitializedNamePtrList::const_iterator i1 = variables1.begin(); + SgInitializedName* initializedName1 = *i1; + + SgVariableSymbol *vs1 = new SgVariableSymbol(initializedName1); + prependStatement(defn1, + isSgScopeStatement(func->get_definition()->get_body())); + + vs1->set_parent(symtab_); + symtab_->insert(str1, vs1); + + if (vs1 == NULL) + throw ir_error("in CreateScalarSymbol: vs1 is NULL!!"); + + return new IR_roseScalarSymbol(this, vs1); + } else + throw std::bad_typeid(); + +} + +IR_ArraySymbol *IR_roseCode::CreateArraySymbol(const IR_Symbol *sym, + std::vector<omega::CG_outputRepr *> &size, int) { + SgType *tn; + char str1[14]; + + if (typeid(*sym) == typeid(IR_roseScalarSymbol)) { + tn = static_cast<const IR_roseScalarSymbol *>(sym)->vs_->get_type(); + } else if (typeid(*sym) == typeid(IR_roseArraySymbol)) { + tn = static_cast<const IR_roseArraySymbol *>(sym)->vs_->get_type(); + while (isSgArrayType(tn) || isSgPointerType(tn)) { + if (isSgArrayType(tn)) + tn = isSgArrayType(tn)->get_base_type(); + else if (isSgPointerType(tn)) + tn = isSgPointerType(tn)->get_base_type(); + else + throw ir_error( + "in CreateScalarSymbol: symbol not an array nor a pointer!"); + } + } else + throw std::bad_typeid(); + + + // Manu:: Fortran support + std::vector<SgExpression *>exprs; + SgExprListExp *exprLstExp; + SgExpression* sizeExpression = new SgNullExpression(); + SgArrayType* arrayType = new SgArrayType(tn,sizeExpression); + sizeExpression->set_parent(arrayType); + + if (!is_fortran_) { + for (int i = size.size() - 1; i >= 0; i--) { + tn = buildArrayType(tn,static_cast<omega::CG_roseRepr *>(size[i])->GetExpression()); + } + } else { // Manu:: required for fortran support + for (int i = size.size() - 1; i >= 0; i--) { + exprs.push_back(static_cast<omega::CG_roseRepr *>(size[i])->GetExpression()); + } + } + + if (is_fortran_) { + exprLstExp = buildExprListExp(exprs); + arrayType->set_dim_info(exprLstExp); + exprLstExp->set_parent(arrayType); + arrayType->set_rank(exprLstExp->get_expressions().size()); + } + + static int rose_array_counter = 1; + SgVariableDeclaration* defn2; + std::string s; + if (!is_fortran_) { + s = std::string("_P") + omega::to_string(rose_array_counter++); + defn2 = buildVariableDeclaration(const_cast<char *>(s.c_str()), tn); + } else {// Manu:: fortran support + s = std::string("f_P") + omega::to_string(rose_array_counter++); + defn2 = buildVariableDeclaration(const_cast<char *>(s.c_str()), arrayType); + } + + + SgInitializedNamePtrList& variables2 = defn2->get_variables(); + + SgInitializedNamePtrList::const_iterator i2 = variables2.begin(); + SgInitializedName* initializedName2 = *i2; + SgVariableSymbol *vs = new SgVariableSymbol(initializedName2); + + prependStatement(defn2, + isSgScopeStatement(func->get_definition()->get_body())); + + vs->set_parent(symtab_); + symtab_->insert(SgName(s.c_str()), vs); + + return new IR_roseArraySymbol(this, vs); +} + +IR_ScalarRef *IR_roseCode::CreateScalarRef(const IR_ScalarSymbol *sym) { + return new IR_roseScalarRef(this, + buildVarRefExp(static_cast<const IR_roseScalarSymbol *>(sym)->vs_)); + +} + +IR_ArrayRef *IR_roseCode::CreateArrayRef(const IR_ArraySymbol *sym, + std::vector<omega::CG_outputRepr *> &index) { + + int t; + + if (sym->n_dim() != index.size()) + throw std::invalid_argument("incorrect array symbol dimensionality"); + + const IR_roseArraySymbol *l_sym = + static_cast<const IR_roseArraySymbol *>(sym); + + SgVariableSymbol *vs = l_sym->vs_; + SgExpression* ia1 = buildVarRefExp(vs); + + + + if (is_fortran_) { // Manu:: fortran support + std::vector<SgExpression *>exprs; + for (int i = 0 ; i < index.size(); i++) { + exprs.push_back(static_cast<omega::CG_roseRepr *>(index[i])->GetExpression()); + } + SgExprListExp *exprLstExp; + exprLstExp = buildExprListExp(exprs); + ia1 = buildPntrArrRefExp(ia1,exprLstExp); + } else { + for (int i = 0; i < index.size(); i++) { +/* + if (is_fortran_) + t = index.size() - i - 1; + else + t = i; +*/ + + // std::string y = + // isSgNode( + // static_cast<omega::CG_roseRepr *>(index[i])->GetExpression())->unparseToString(); + ia1 = buildPntrArrRefExp(ia1, + static_cast<omega::CG_roseRepr *>(index[i])->GetExpression()); + + } + } + + SgPntrArrRefExp *ia = isSgPntrArrRefExp(ia1); + //std::string z = isSgNode(ia)->unparseToString(); + + return new IR_roseArrayRef(this, ia, -1); + +} + +std::vector<IR_ScalarRef *> IR_roseCode::FindScalarRef( + const omega::CG_outputRepr *repr) const { + std::vector<IR_ScalarRef *> scalars; + SgNode *tnl = static_cast<const omega::CG_roseRepr *>(repr)->GetCode(); + SgStatementPtrList *list = + static_cast<const omega::CG_roseRepr *>(repr)->GetList(); + SgStatement* stmt; + SgExpression * exp; + + if (list != NULL) { + for (SgStatementPtrList::iterator it = (*list).begin(); + it != (*list).end(); it++) { + omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(*it)); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } + } + + else if (tnl != NULL) { + if (stmt = isSgStatement(tnl)) { + if (isSgBasicBlock(stmt)) { + SgStatementPtrList& stmts = + isSgBasicBlock(stmt)->get_statements(); + for (int i = 0; i < stmts.size(); i++) { + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgNode(stmts[i])); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } + + } else if (isSgForStatement(stmt)) { + + SgForStatement *tnf = isSgForStatement(stmt); + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgStatement(tnf->get_loop_body())); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } else if (isSgFortranDo(stmt)) { + SgFortranDo *tfortran = isSgFortranDo(stmt); + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgStatement(tfortran->get_body())); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } else if (isSgIfStmt(stmt)) { + SgIfStmt* tni = isSgIfStmt(stmt); + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgNode(tni->get_conditional())); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + r = new omega::CG_roseRepr(isSgNode(tni->get_true_body())); + a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + r = new omega::CG_roseRepr(isSgNode(tni->get_false_body())); + a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } else if (isSgExprStatement(stmt)) { + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgExpression( + isSgExprStatement(stmt)->get_expression())); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + + } + } + } else { + SgExpression* op = + static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); + if (isSgVarRefExp(op) + && (!isSgArrayType(isSgVarRefExp(op)->get_type()))) { + /* if ((isSgAssignOp(isSgNode(op)->get_parent())) + && ((isSgAssignOp(isSgNode(op)->get_parent())->get_lhs_operand()) + == op)) + scalars.push_back( + new IR_roseScalarRef(this, + isSgAssignOp(isSgNode(op)->get_parent()), -1)); + else + */ + if (SgBinaryOp* op_ = isSgBinaryOp( + isSgVarRefExp(op)->get_parent())) { + if (SgCompoundAssignOp *op__ = isSgCompoundAssignOp(op_)) { + if (isSgCompoundAssignOp(op_)->get_lhs_operand() + == isSgVarRefExp(op)) { + scalars.push_back( + new IR_roseScalarRef(this, isSgVarRefExp(op), + 1)); + scalars.push_back( + new IR_roseScalarRef(this, isSgVarRefExp(op), + 0)); + } + } + } else if (SgAssignOp* assmt = isSgAssignOp( + isSgVarRefExp(op)->get_parent())) { + + if (assmt->get_lhs_operand() == isSgVarRefExp(op)) + scalars.push_back( + new IR_roseScalarRef(this, isSgVarRefExp(op), 1)); + } else if (SgAssignOp * assmt = isSgAssignOp( + isSgVarRefExp(op)->get_parent())) { + + if (assmt->get_rhs_operand() == isSgVarRefExp(op)) + scalars.push_back( + new IR_roseScalarRef(this, isSgVarRefExp(op), 0)); + } else + scalars.push_back( + new IR_roseScalarRef(this, isSgVarRefExp(op), 0)); + } else if (isSgAssignOp(op)) { + omega::CG_roseRepr *r1 = new omega::CG_roseRepr( + isSgAssignOp(op)->get_lhs_operand()); + std::vector<IR_ScalarRef *> a1 = FindScalarRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(scalars)); + omega::CG_roseRepr *r2 = new omega::CG_roseRepr( + isSgAssignOp(op)->get_rhs_operand()); + std::vector<IR_ScalarRef *> a2 = FindScalarRef(r2); + delete r2; + std::copy(a2.begin(), a2.end(), back_inserter(scalars)); + + } else if (isSgBinaryOp(op)) { + omega::CG_roseRepr *r1 = new omega::CG_roseRepr( + isSgBinaryOp(op)->get_lhs_operand()); + std::vector<IR_ScalarRef *> a1 = FindScalarRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(scalars)); + omega::CG_roseRepr *r2 = new omega::CG_roseRepr( + isSgBinaryOp(op)->get_rhs_operand()); + std::vector<IR_ScalarRef *> a2 = FindScalarRef(r2); + delete r2; + std::copy(a2.begin(), a2.end(), back_inserter(scalars)); + } else if (isSgUnaryOp(op)) { + omega::CG_roseRepr *r1 = new omega::CG_roseRepr( + isSgUnaryOp(op)->get_operand()); + std::vector<IR_ScalarRef *> a1 = FindScalarRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(scalars)); + } + + } + return scalars; + +} + +std::vector<IR_ArrayRef *> IR_roseCode::FindArrayRef( + const omega::CG_outputRepr *repr) const { + std::vector<IR_ArrayRef *> arrays; + SgNode *tnl = static_cast<const omega::CG_roseRepr *>(repr)->GetCode(); + SgStatementPtrList* list = + static_cast<const omega::CG_roseRepr *>(repr)->GetList(); + SgStatement* stmt; + SgExpression * exp; + + if (list != NULL) { + for (SgStatementPtrList::iterator it = (*list).begin(); + it != (*list).end(); it++) { + omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(*it)); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } + } else if (tnl != NULL) { + if (stmt = isSgStatement(tnl)) { + if (isSgBasicBlock(stmt)) { + SgStatementPtrList& stmts = + isSgBasicBlock(stmt)->get_statements(); + for (int i = 0; i < stmts.size(); i++) { + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgNode(stmts[i])); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } + + } else if (isSgForStatement(stmt)) { + + SgForStatement *tnf = isSgForStatement(stmt); + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgStatement(tnf->get_loop_body())); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } else if (isSgFortranDo(stmt)) { + SgFortranDo *tfortran = isSgFortranDo(stmt); + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgStatement(tfortran->get_body())); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } else if (isSgIfStmt(stmt)) { + SgIfStmt* tni = isSgIfStmt(stmt); + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgNode(tni->get_conditional())); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + r = new omega::CG_roseRepr(isSgNode(tni->get_true_body())); + a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + r = new omega::CG_roseRepr(isSgNode(tni->get_false_body())); + a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } else if (isSgExprStatement(stmt)) { + omega::CG_roseRepr *r = new omega::CG_roseRepr( + isSgExpression( + isSgExprStatement(stmt)->get_expression())); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + + } + } + } else { + SgExpression* op = + static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); + if (isSgPntrArrRefExp(op)) { + + SgVarRefExp* base; + SgExpression* op2; + if (isSgCompoundAssignOp(isSgPntrArrRefExp(op)->get_parent())) { + IR_roseArrayRef *ref1 = new IR_roseArrayRef(this, + isSgPntrArrRefExp(op), 0); + arrays.push_back(ref1); + IR_roseArrayRef *ref2 = new IR_roseArrayRef(this, + isSgPntrArrRefExp(op), 1); + arrays.push_back(ref2); + } else { + IR_roseArrayRef *ref3 = new IR_roseArrayRef(this, + isSgPntrArrRefExp(op), -1); + arrays.push_back(ref3); + + while (isSgPntrArrRefExp(op)) { + op2 = isSgPntrArrRefExp(op)->get_rhs_operand(); + op = isSgPntrArrRefExp(op)->get_lhs_operand(); + omega::CG_roseRepr *r = new omega::CG_roseRepr(op2); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + + } + } + /* base = isSgVarRefExp(op); + SgVariableSymbol *arrSymbol = (SgVariableSymbol*)(base->get_symbol()); + SgArrayType *arrType = isSgArrayType(arrSymbol->get_type()); + + SgExprListExp* dimList = arrType->get_dim_info(); + + if(dimList != NULL){ + SgExpressionPtrList::iterator it = dimList->get_expressions().begin(); + SgExpression *expr; + + + for (int i = 0; it != dimList->get_expressions().end(); it++, i++) + { + expr = *it; + + omega::CG_roseRepr *r = new omega::CG_roseRepr(expr); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } + + } + arrays.push_back(ref); + */ + } else if (isSgAssignOp(op)) { + omega::CG_roseRepr *r1 = new omega::CG_roseRepr( + isSgAssignOp(op)->get_lhs_operand()); + std::vector<IR_ArrayRef *> a1 = FindArrayRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(arrays)); + omega::CG_roseRepr *r2 = new omega::CG_roseRepr( + isSgAssignOp(op)->get_rhs_operand()); + std::vector<IR_ArrayRef *> a2 = FindArrayRef(r2); + delete r2; + std::copy(a2.begin(), a2.end(), back_inserter(arrays)); + + } else if (isSgBinaryOp(op)) { + omega::CG_roseRepr *r1 = new omega::CG_roseRepr( + isSgBinaryOp(op)->get_lhs_operand()); + std::vector<IR_ArrayRef *> a1 = FindArrayRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(arrays)); + omega::CG_roseRepr *r2 = new omega::CG_roseRepr( + isSgBinaryOp(op)->get_rhs_operand()); + std::vector<IR_ArrayRef *> a2 = FindArrayRef(r2); + delete r2; + std::copy(a2.begin(), a2.end(), back_inserter(arrays)); + } else if (isSgUnaryOp(op)) { + omega::CG_roseRepr *r1 = new omega::CG_roseRepr( + isSgUnaryOp(op)->get_operand()); + std::vector<IR_ArrayRef *> a1 = FindArrayRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(arrays)); + } + + } + return arrays; + + /* std::string x; + SgStatement* stmt = isSgStatement(tnl); + SGExprStatement* expr_statement = isSgExprStatement(stmt); + SgExpression* exp= NULL; + if(expr_statement == NULL){ + if(! (SgExpression* exp = isSgExpression(tnl)) + throw ir_error("FindArrayRef: Not a stmt nor an expression!!"); + + if( expr_statement != NULL){ + for(int i=0; i < tnl->get_numberOfTraversalSuccessors(); i++){ + + SgNode* tn = isSgStatement(tnl); + SgStatement* stmt = isSgStatement(tn); + if(stmt != NULL){ + SgExprStatement* expr_statement = isSgExprStatement(tn); + if(expr_statement != NULL) + x = isSgNode(expr_statement)->unparseToString(); + exp = expr_statement->get_expression(); + + } + else{ + + exp = isSgExpression(tn); + } + if(exp != NULL){ + x = isSgNode(exp)->unparseToString(); + + if(SgPntrArrRefExp* arrRef = isSgPntrArrRefExp(exp) ){ + if(arrRef == NULL) + throw ir_error("something wrong"); + IR_roseArrayRef *ref = new IR_roseArrayRef(this, arrRef); + arrays.push_back(ref); + } + + omega::CG_outputRepr *r = new omega::CG_roseRepr(isSgNode(exp->get_rhs_operand())); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + + omega::CG_outputRepr *r1 = new omega::CG_roseRepr(isSgNode(exp->get_lhs_operand())); + std::vector<IR_ArrayRef *> a1 = FindArrayRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(arrays)); + + } + }*/ + +} + +std::vector<IR_Control *> IR_roseCode::FindOneLevelControlStructure( + const IR_Block *block) const { + + std::vector<IR_Control *> controls; + int i; + int j; + int begin; + int end; + SgNode* tnl_ = + ((static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_); + + if (isSgForStatement(tnl_)) + controls.push_back(new IR_roseLoop(this, tnl_)); + else if (isSgFortranDo(tnl_)) + controls.push_back(new IR_roseLoop(this, tnl_)); + else if (isSgIfStmt(tnl_)) + controls.push_back(new IR_roseIf(this, tnl_)); + + else if (isSgBasicBlock(tnl_)) { + + SgStatementPtrList& stmts = isSgBasicBlock(tnl_)->get_statements(); + + for (i = 0; i < stmts.size(); i++) { + if (isSgNode(stmts[i]) + == ((static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->start_)) + begin = i; + if (isSgNode(stmts[i]) + == ((static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->end_)) + end = i; + } + + SgNode* start = NULL; + SgNode* prev = NULL; + for (i = begin; i <= end; i++) { + if (isSgForStatement(stmts[i]) || isSgFortranDo(stmts[i])) { + if (start != NULL) { + controls.push_back( + new IR_roseBlock(this, + (static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_, + start, prev)); + start = NULL; + } + controls.push_back(new IR_roseLoop(this, isSgNode(stmts[i]))); + } else if (isSgIfStmt(stmts[i])) { + if (start != NULL) { + controls.push_back( + new IR_roseBlock(this, + (static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_, + start, prev)); + start = NULL; + } + controls.push_back(new IR_roseIf(this, isSgNode(stmts[i]))); + + } else if (start == NULL) + start = isSgNode(stmts[i]); + + prev = isSgNode(stmts[i]); + } + + if ((start != NULL) && (start != isSgNode(stmts[begin]))) + controls.push_back( + new IR_roseBlock(this, + (static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_, + start, prev)); + } + + return controls; + +} + +/*std::vector<IR_Control *> IR_roseCode::FindOneLevelControlStructure(const IR_Block *block) const { + + std::vector<IR_Control *> controls; + int i; + int j; + SgNode* tnl_ = ((static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_); + + + if(isSgForStatement(tnl_)) + controls.push_back(new IR_roseLoop(this,tnl_)); + + else if(isSgBasicBlock(tnl_)){ + + SgStatementPtrList& stmts = isSgBasicBlock(tnl_)->get_statements(); + + for(i =0; i < stmts.size(); i++){ + if(isSgNode(stmts[i]) == ((static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->start_)) + break; + } + + + SgNode* start= NULL; + SgNode* prev= NULL; + for(; i < stmts.size(); i++){ + if ( isSgForStatement(stmts[i]) || isSgFortranDo(stmts[i])){ + if(start != NULL){ + controls.push_back(new IR_roseBlock(this, (static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_ , start, prev)); + start = NULL; + } + controls.push_back(new IR_roseLoop(this, isSgNode(stmts[i]))); + } + else if( start == NULL ) + start = isSgNode(stmts[i]); + + prev = isSgNode(stmts[i]); + } + + if((start != NULL) && (start != isSgNode(stmts[0]))) + controls.push_back(new IR_roseBlock(this, (static_cast<IR_roseBlock *>(const_cast<IR_Block *>(block)))->tnl_, start, prev)); + } + + return controls; + + } + +*/ +IR_Block *IR_roseCode::MergeNeighboringControlStructures( + const std::vector<IR_Control *> &controls) const { + if (controls.size() == 0) + return NULL; + + SgNode *tnl = NULL; + SgNode *start, *end; + for (int i = 0; i < controls.size(); i++) { + switch (controls[i]->type()) { + case IR_CONTROL_LOOP: { + SgNode *tf = static_cast<IR_roseLoop *>(controls[i])->tf_; + if (tnl == NULL) { + tnl = tf->get_parent(); + start = end = tf; + } else { + if (tnl != tf->get_parent()) + throw ir_error("controls to merge not at the same level"); + end = tf; + } + break; + } + case IR_CONTROL_BLOCK: { + if (tnl == NULL) { + tnl = static_cast<IR_roseBlock *>(controls[0])->tnl_; + start = static_cast<IR_roseBlock *>(controls[0])->start_; + end = static_cast<IR_roseBlock *>(controls[0])->end_; + } else { + if (tnl != static_cast<IR_roseBlock *>(controls[0])->tnl_) + throw ir_error("controls to merge not at the same level"); + end = static_cast<IR_roseBlock *>(controls[0])->end_; + } + break; + } + default: + throw ir_error("unrecognized control to merge"); + } + } + + return new IR_roseBlock(controls[0]->ir_, tnl, start, end); +} + +IR_Block *IR_roseCode::GetCode() const { + SgFunctionDefinition* def = NULL; + SgBasicBlock* block = NULL; + if (func != 0) { + if (def = func->get_definition()) { + if (block = def->get_body()) + return new IR_roseBlock(this, + func->get_definition()->get_body()); + } + } + + return NULL; + +} + +void IR_roseCode::ReplaceCode(IR_Control *old, omega::CG_outputRepr *repr) { + /* SgStatementPtrList *tnl = + static_cast<omega::CG_roseRepr *>(repr)->GetList(); + SgNode *tf_old; + */ + SgStatementPtrList *tnl = + static_cast<omega::CG_roseRepr *>(repr)->GetList(); + SgNode* node_ = static_cast<omega::CG_roseRepr *>(repr)->GetCode(); + SgNode * tf_old; + + /* May need future revision it tnl has more than one statement */ + + switch (old->type()) { + + case IR_CONTROL_LOOP: + tf_old = static_cast<IR_roseLoop *>(old)->tf_; + break; + case IR_CONTROL_BLOCK: + tf_old = static_cast<IR_roseBlock *>(old)->start_; + break; + + default: + throw ir_error("control structure to be replaced not supported"); + break; + } + + std::string y = tf_old->unparseToString(); + SgStatement *s = isSgStatement(tf_old); + if (s != 0) { + SgStatement *p = isSgStatement(tf_old->get_parent()); + + if (p != 0) { + SgStatement* temp = s; + if (tnl != NULL) { + SgStatementPtrList::iterator it = (*tnl).begin(); + p->insert_statement(temp, *it, true); + temp = *it; + p->remove_statement(s); + it++; + for (; it != (*tnl).end(); it++) { + p->insert_statement(temp, *it, false); + temp = *it; + } + } else if (node_ != NULL) { + if (!isSgStatement(node_)) + throw ir_error("Replacing Code not a statement!"); + else { + SgStatement* replace_ = isSgStatement(node_); + p->insert_statement(s, replace_, true); + p->remove_statement(s); + + } + } else { + throw ir_error("Replacing Code not a statement!"); + } + } else + throw ir_error("Replacing Code not a statement!"); + } else + throw ir_error("Replacing Code not a statement!"); + + delete old; + delete repr; + /* May need future revision it tnl has more than one statement */ + /* + switch (old->type()) { + + case IR_CONTROL_LOOP: + tf_old = static_cast<IR_roseLoop *>(old)->tf_; + break; + case IR_CONTROL_BLOCK: + tf_old = static_cast<IR_roseBlock *>(old)->start_; + break; + + default: + throw ir_error("control structure to be replaced not supported"); + break; + } + + // std::string y = tf_old->unparseToString(); + SgStatement *s = isSgStatement(tf_old); + if (s != 0) { + SgStatement *p = isSgStatement(tf_old->get_parent()); + + if (p != 0) { + // SgStatement* it2 = isSgStatement(tnl); + + // if(it2 != NULL){ + p->replace_statement(s, *tnl); + // } + // else { + // throw ir_error("Replacing Code not a statement!"); + // } + } else + throw ir_error("Replacing Code not a statement!"); + } else + throw ir_error("Replacing Code not a statement!"); + // y = tnl->unparseToString(); + delete old; + delete repr; + */ +} + +void IR_roseCode::ReplaceExpression(IR_Ref *old, omega::CG_outputRepr *repr) { + + SgExpression* op = static_cast<omega::CG_roseRepr *>(repr)->GetExpression(); + + if (typeid(*old) == typeid(IR_roseArrayRef)) { + SgPntrArrRefExp* ia_orig = static_cast<IR_roseArrayRef *>(old)->ia_; + SgExpression* parent = isSgExpression(isSgNode(ia_orig)->get_parent()); + std::string x = isSgNode(op)->unparseToString(); + std::string y = isSgNode(ia_orig)->unparseToString(); + if (parent != NULL) { + std::string z = isSgNode(parent)->unparseToString(); + parent->replace_expression(ia_orig, op); + isSgNode(op)->set_parent(isSgNode(parent)); + + /* if(isSgBinaryOp(parent)) + { + if(isSgBinaryOp(parent)->get_lhs_operand() == ia_orig){ + isSgBinaryOp(parent)->set_lhs_operand(op); + }else if(isSgBinaryOp(parent)->get_rhs_operand() == ia_orig){ + isSgBinaryOp(parent)->set_rhs_operand(op); + + + } + else + parent->replace_expression(ia_orig, op); + */ + } else { + SgStatement* parent_stmt = isSgStatement( + isSgNode(ia_orig)->get_parent()); + if (parent_stmt != NULL) + parent_stmt->replace_expression(ia_orig, op); + else + throw ir_error( + "ReplaceExpression: parent neither expression nor statement"); + } + } else + throw ir_error("replacing a scalar variable not implemented"); + + delete old; +} + +/*std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > IR_roseCode::FindScalarDeps( + const omega::CG_outputRepr *repr1, const omega::CG_outputRepr *repr2, + std::vector<std::string> index, int i, int j) { + + std::vector<DependenceVector> dvs1; + std::vector<DependenceVector> dvs2; + SgNode *tnl_1 = static_cast<const omega::CG_roseRepr *>(repr1)->GetCode(); + SgNode *tnl_2 = static_cast<const omega::CG_roseRepr *>(repr2)->GetCode(); + SgStatementPtrList* list_1 = + static_cast<const omega::CG_roseRepr *>(repr1)->GetList(); + SgStatementPtrList output_list_1; + + std::map<SgVarRefExp*, IR_ScalarRef*> read_scalars_1; + std::map<SgVarRefExp*, IR_ScalarRef*> write_scalars_1; + std::set<std::string> indices; + //std::set<VirtualCFG::CFGNode> reaching_defs_1; + std::set<std::string> def_vars_1; + + populateLists(tnl_1, list_1, output_list_1); + populateScalars(repr1, read_scalars_1, write_scalars_1, indices, index); + //def_vars_1); + //findDefinitions(output_list_1, reaching_defs_1, write_scalars_1); + //def_vars_1); + if (repr1 == repr2) + checkSelfDependency(output_list_1, dvs1, read_scalars_1, + write_scalars_1, index, i, j); + else { + SgStatementPtrList* list_2 = + static_cast<const omega::CG_roseRepr *>(repr2)->GetList(); + SgStatementPtrList output_list_2; + + std::map<SgVarRefExp*, IR_ScalarRef*> read_scalars_2; + std::map<SgVarRefExp*, IR_ScalarRef*> write_scalars_2; + //std::set<VirtualCFG::CFGNode> reaching_defs_2; + std::set<std::string> def_vars_2; + + populateLists(tnl_2, list_2, output_list_2); + populateScalars(repr2, read_scalars_2, write_scalars_2, indices, index); + //def_vars_2); + + checkDependency(output_list_2, dvs1, read_scalars_2, write_scalars_1, + index, i, j); + checkDependency(output_list_1, dvs1, read_scalars_1, write_scalars_2, + index, i, j); + checkWriteDependency(output_list_2, dvs1, write_scalars_2, + write_scalars_1, index, i, j); + checkWriteDependency(output_list_1, dvs1, write_scalars_1, + write_scalars_2, index, i, j); + } + + return std::make_pair(dvs1, dvs2); + //populateLists(tnl_2, list_2, list2); + + } +*/ +IR_OPERATION_TYPE IR_roseCode::QueryExpOperation( + const omega::CG_outputRepr *repr) const { + SgExpression* op = + static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); + + if (isSgValueExp(op)) + return IR_OP_CONSTANT; + else if (isSgVarRefExp(op) || isSgPntrArrRefExp(op)) + return IR_OP_VARIABLE; + else if (isSgAssignOp(op) || isSgCompoundAssignOp(op)) + return IR_OP_ASSIGNMENT; + else if (isSgAddOp(op)) + return IR_OP_PLUS; + else if (isSgSubtractOp(op)) + return IR_OP_MINUS; + else if (isSgMultiplyOp(op)) + return IR_OP_MULTIPLY; + else if (isSgDivideOp(op)) + return IR_OP_DIVIDE; + else if (isSgMinusOp(op)) + return IR_OP_NEGATIVE; + else if (isSgConditionalExp(op)) { + SgExpression* cond = isSgConditionalExp(op)->get_conditional_exp(); + if (isSgGreaterThanOp(cond)) + return IR_OP_MAX; + else if (isSgLessThanOp(cond)) + return IR_OP_MIN; + } else if (isSgUnaryAddOp(op)) + return IR_OP_POSITIVE; + else if (isSgNullExpression(op)) + return IR_OP_NULL; + else + return IR_OP_UNKNOWN; +} +/*void IR_roseCode::populateLists(SgNode* tnl_1, SgStatementPtrList* list_1, + SgStatementPtrList& output_list_1) { + if ((tnl_1 == NULL) && (list_1 != NULL)) { + output_list_1 = *list_1; + } else if (tnl_1 != NULL) { + + if (isSgForStatement(tnl_1)) { + SgStatement* check = isSgForStatement(tnl_1)->get_loop_body(); + if (isSgBasicBlock(check)) { + output_list_1 = isSgBasicBlock(check)->get_statements(); + + } else + output_list_1.push_back(check); + + } else if (isSgBasicBlock(tnl_1)) + output_list_1 = isSgBasicBlock(tnl_1)->get_statements(); + else if (isSgExprStatement(tnl_1)) + output_list_1.push_back(isSgExprStatement(tnl_1)); + else + //if (isSgIfStmt(tnl_1)) { + + throw ir_error( + "Statement type not handled, (probably IF statement)!!"); + + } + + } + + void IR_roseCode::populateScalars(const omega::CG_outputRepr *repr1, + std::map<SgVarRefExp*, IR_ScalarRef*> &read_scalars_1, + std::map<SgVarRefExp*, IR_ScalarRef*> &write_scalars_1, + std::set<std::string> &indices, std::vector<std::string> &index) { + + //std::set<std::string> &def_vars) { + std::vector<IR_ScalarRef *> scalars = FindScalarRef(repr1); + + for (int k = 0; k < index.size(); k++) + indices.insert(index[k]); + + for (int k = 0; k < scalars.size(); k++) + if (indices.find(scalars[k]->name()) == indices.end()) { + if (scalars[k]->is_write()) { + write_scalars_1.insert( + std::pair<SgVarRefExp*, IR_ScalarRef*>( + (isSgVarRefExp( + static_cast<const omega::CG_roseRepr *>(scalars[k]->convert())->GetExpression())), + scalars[k])); + + } else + + read_scalars_1.insert( + std::pair<SgVarRefExp*, IR_ScalarRef*>( + (isSgVarRefExp( + static_cast<const omega::CG_roseRepr *>(scalars[k]->convert())->GetExpression())), + scalars[k])); + } + + } + + + void IR_roseCode::checkWriteDependency(SgStatementPtrList &output_list_1, + std::vector<DependenceVector> &dvs1, + std::map<SgVarRefExp*, IR_ScalarRef*> &read_scalars_1, + std::map<SgVarRefExp*, IR_ScalarRef*> &write_scalars_1, + std::vector<std::string> &index, int i, int j) { + + for (std::map<SgVarRefExp*, IR_ScalarRef*>::iterator it = + read_scalars_1.begin(); it != read_scalars_1.end(); it++) { + SgVarRefExp* var__ = it->first; + + ssa_unfiltered_cfg::SSA_UnfilteredCfg::NodeReachingDefTable to_compare = + main_ssa->getReachingDefsBefore(isSgNode(var__)); + + for (ssa_unfiltered_cfg::SSA_UnfilteredCfg::NodeReachingDefTable::iterator it4 = + to_compare.begin(); it4 != to_compare.end(); it4++) { + ssa_unfiltered_cfg::SSA_UnfilteredCfg::VarName var_ = it4->first; + for (int j = 0; j < var_.size(); j++) { + int found = 0; + if (var_[j] == var__->get_symbol()->get_declaration()) { + + ssa_unfiltered_cfg::ReachingDef::ReachingDefPtr to_compare_2 = + it4->second; + + if (to_compare_2->isPhiFunction()) { + std::set<VirtualCFG::CFGNode> to_compare_set = + to_compare_2->getActualDefinitions(); + for (std::set<VirtualCFG::CFGNode>::iterator cfg_it = + to_compare_set.begin(); + cfg_it != to_compare_set.end(); cfg_it++) { + + if (isSgAssignOp(cfg_it->getNode()) + || isSgCompoundAssignOp(cfg_it->getNode())) + if (SgVarRefExp* variable = + isSgVarRefExp( + isSgBinaryOp(cfg_it->getNode())->get_lhs_operand())) { + + if (write_scalars_1.find(variable) + != write_scalars_1.end()) { + + + //end debug + found = 1; + DependenceVector dv1; + dv1.sym = it->second->symbol(); + dv1.is_scalar_dependence = true; + + int max = (j > i) ? j : i; + int start = index.size() - max; + + //1.lbounds.push_back(0); + //1.ubounds.push_back(0); + //dv2.sym = + // read_scalars_2.find(*di)->second->symbol(); + for (int k = 0; k < index.size(); k++) { + if (k >= max) { + dv1.lbounds.push_back( + negInfinity); + dv1.ubounds.push_back(-1); + } else { + dv1.lbounds.push_back(0); + dv1.ubounds.push_back(0); + + } + + } + dvs1.push_back(dv1); + break; + } + } + } + + } + + } + if (found == 1) + break; + } + } + } + } + void IR_roseCode::checkDependency(SgStatementPtrList &output_list_1, + std::vector<DependenceVector> &dvs1, + std::map<SgVarRefExp*, IR_ScalarRef*> &read_scalars_1, + std::map<SgVarRefExp*, IR_ScalarRef*> &write_scalars_1, + std::vector<std::string> &index, int i, int j) { + + for (SgStatementPtrList::iterator it2 = output_list_1.begin(); + it2 != output_list_1.end(); it2++) { + + std::set<SgVarRefExp*> vars_1 = main_ssa->getUsesAtNode( + isSgNode(isSgExprStatement(*it2)->get_expression())); + + std::set<SgVarRefExp*>::iterator di; + + for (di = vars_1.begin(); di != vars_1.end(); di++) { + int found = 0; + if (read_scalars_1.find(*di) != read_scalars_1.end()) { + + ssa_unfiltered_cfg::ReachingDef::ReachingDefPtr to_compare = + main_ssa->getDefinitionForUse(*di); + if (to_compare->isPhiFunction()) { + + std::set<VirtualCFG::CFGNode> to_compare_set = + to_compare->getActualDefinitions(); + + for (std::set<VirtualCFG::CFGNode>::iterator cfg_it = + to_compare_set.begin(); + cfg_it != to_compare_set.end(); cfg_it++) { + + + if (SgAssignOp* definition = isSgAssignOp( + cfg_it->getNode())) + if (SgVarRefExp* variable = isSgVarRefExp( + definition->get_lhs_operand())) { + + if (write_scalars_1.find(variable) + != write_scalars_1.end()) { + + found = 1; + DependenceVector dv1; + //DependenceVector dv2; + dv1.sym = + read_scalars_1.find(*di)->second->symbol(); + dv1.is_scalar_dependence = true; + + int max = (j > i) ? j : i; + int start = index.size() - max; + + //1.lbounds.push_back(0); + //1.ubounds.push_back(0); + //dv2.sym = + // read_scalars_2.find(*di)->second->symbol(); + for (int k = 0; k < index.size(); k++) { + if (k >= max) { + dv1.lbounds.push_back(negInfinity); + dv1.ubounds.push_back(-1); + } else { + dv1.lbounds.push_back(0); + dv1.ubounds.push_back(0); + + } + + } + dvs1.push_back(dv1); + break; + } + } + } + } + if (found == 1) + break; + } + } + } + + } + + void IR_roseCode::checkSelfDependency(SgStatementPtrList &output_list_1, + std::vector<DependenceVector> &dvs1, + std::map<SgVarRefExp*, IR_ScalarRef*> &read_scalars_1, + std::map<SgVarRefExp*, IR_ScalarRef*> &write_scalars_1, + std::vector<std::string> &index, int i, int j) { + + for (SgStatementPtrList::iterator it2 = output_list_1.begin(); + it2 != output_list_1.end(); it2++) { + + std::set<SgVarRefExp*> vars_1 = main_ssa->getUsesAtNode( + isSgNode(isSgExprStatement(*it2)->get_expression())); + + std::set<SgVarRefExp*>::iterator di; + + for (di = vars_1.begin(); di != vars_1.end(); di++) { + + if (read_scalars_1.find(*di) != read_scalars_1.end()) { + + ssa_unfiltered_cfg::ReachingDef::ReachingDefPtr to_compare = + main_ssa->getDefinitionForUse(*di); + if (to_compare->isPhiFunction()) { + + std::set<VirtualCFG::CFGNode> to_compare_set = + to_compare->getActualDefinitions(); + int found = 0; + for (std::set<VirtualCFG::CFGNode>::iterator cfg_it = + to_compare_set.begin(); + cfg_it != to_compare_set.end(); cfg_it++) { + + if (isSgAssignOp(cfg_it->getNode()) + || isSgCompoundAssignOp(cfg_it->getNode())) + if (SgVarRefExp* variable = + isSgVarRefExp( + isSgBinaryOp(cfg_it->getNode())->get_lhs_operand())) { + + if (write_scalars_1.find(variable) + == write_scalars_1.end()) { + + + found = 1; + DependenceVector dv1; + dv1.sym = + read_scalars_1.find(*di)->second->symbol(); + dv1.is_scalar_dependence = true; + + int max = (j > i) ? j : i; + int start = index.size() - max; + + //1.lbounds.push_back(0); + //1.ubounds.push_back(0); + //dv2.sym = + // read_scalars_2.find(*di)->second->symbol(); + for (int k = 0; k < index.size(); k++) { + if (k >= max) { + dv1.lbounds.push_back(negInfinity); + dv1.ubounds.push_back(-1); + } else { + dv1.lbounds.push_back(0); + dv1.ubounds.push_back(0); + + } + + } + dvs1.push_back(dv1); + break; + } + } + } + } + + } + } + } + + } +*/ +IR_CONDITION_TYPE IR_roseCode::QueryBooleanExpOperation( + const omega::CG_outputRepr *repr) const { + SgExpression* op2 = + static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); + SgNode* op; + + if (op2 == NULL) { + op = static_cast<const omega::CG_roseRepr *>(repr)->GetCode(); + + if (op != NULL) { + if (isSgExprStatement(op)) + op2 = isSgExprStatement(op)->get_expression(); + else + return IR_COND_UNKNOWN; + } else + return IR_COND_UNKNOWN; + } + + if (isSgEqualityOp(op2)) + return IR_COND_EQ; + else if (isSgNotEqualOp(op2)) + return IR_COND_NE; + else if (isSgLessThanOp(op2)) + return IR_COND_LT; + else if (isSgLessOrEqualOp(op2)) + return IR_COND_LE; + else if (isSgGreaterThanOp(op2)) + return IR_COND_GT; + else if (isSgGreaterOrEqualOp(op2)) + return IR_COND_GE; + + return IR_COND_UNKNOWN; + +} + +std::vector<omega::CG_outputRepr *> IR_roseCode::QueryExpOperand( + const omega::CG_outputRepr *repr) const { + std::vector<omega::CG_outputRepr *> v; + SgExpression* op1; + SgExpression* op2; + SgExpression* op = + static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); + omega::CG_roseRepr *repr1; + + if (isSgValueExp(op) || isSgVarRefExp(op)) { + omega::CG_roseRepr *repr = new omega::CG_roseRepr(op); + v.push_back(repr); + } else if (isSgAssignOp(op)) { + op1 = isSgAssignOp(op)->get_rhs_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + /*may be a problem as assignOp is a binaryop destop might be needed */ + } else if (isSgMinusOp(op)) { + op1 = isSgMinusOp(op)->get_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + } else if (isSgUnaryAddOp(op)) { + op1 = isSgUnaryAddOp(op)->get_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + } else if ((isSgAddOp(op) || isSgSubtractOp(op)) + || (isSgMultiplyOp(op) || isSgDivideOp(op))) { + op1 = isSgBinaryOp(op)->get_lhs_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + + op2 = isSgBinaryOp(op)->get_rhs_operand(); + repr1 = new omega::CG_roseRepr(op2); + v.push_back(repr1); + } else if (isSgConditionalExp(op)) { + SgExpression* cond = isSgConditionalExp(op)->get_conditional_exp(); + op1 = isSgBinaryOp(cond)->get_lhs_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + + op2 = isSgBinaryOp(cond)->get_rhs_operand(); + repr1 = new omega::CG_roseRepr(op2); + v.push_back(repr1); + } else if (isSgCompoundAssignOp(op)) { + SgExpression* cond = isSgCompoundAssignOp(op); + op1 = isSgBinaryOp(cond)->get_lhs_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + + op2 = isSgBinaryOp(cond)->get_rhs_operand(); + repr1 = new omega::CG_roseRepr(op2); + v.push_back(repr1); + + } else if (isSgBinaryOp(op)) { + + op1 = isSgBinaryOp(op)->get_lhs_operand(); + repr1 = new omega::CG_roseRepr(op1); + v.push_back(repr1); + + op2 = isSgBinaryOp(op)->get_rhs_operand(); + repr1 = new omega::CG_roseRepr(op2); + v.push_back(repr1); + } + + else + throw ir_error("operation not supported"); + + return v; +} + +IR_Ref *IR_roseCode::Repr2Ref(const omega::CG_outputRepr *repr) const { + SgExpression* op = + static_cast<const omega::CG_roseRepr *>(repr)->GetExpression(); + + if (SgValueExp* im = isSgValueExp(op)) { + if (isSgIntVal(im)) + return new IR_roseConstantRef(this, + static_cast<omega::coef_t>(isSgIntVal(im)->get_value())); + else if (isSgUnsignedIntVal(im)) + return new IR_roseConstantRef(this, + static_cast<omega::coef_t>(isSgUnsignedIntVal(im)->get_value())); + else if (isSgLongIntVal(im)) + return new IR_roseConstantRef(this, + static_cast<omega::coef_t>(isSgLongIntVal(im)->get_value())); + else if (isSgFloatVal(im)) + return new IR_roseConstantRef(this, isSgFloatVal(im)->get_value()); + else + assert(0); + + } else if (isSgVarRefExp(op)) + return new IR_roseScalarRef(this, isSgVarRefExp(op)); + else + assert(0); + +} + diff --git a/ir_rose.hh b/ir_rose.hh new file mode 100644 index 0000000..0c0417a --- /dev/null +++ b/ir_rose.hh @@ -0,0 +1,289 @@ +#ifndef IR_ROSE_HH +#define IR_ROSE_HH + +#include <omega.h> +#include "ir_code.hh" +#include "ir_rose_utils.hh" +#include <AstInterface_ROSE.h> +#include "chill_error.hh" +#include "staticSingleAssignment.h" +#include "VariableRenaming.h" +#include "ssaUnfilteredCfg.h" +#include "virtualCFG.h" +#include <omega.h> + +struct IR_roseScalarSymbol: public IR_ScalarSymbol { + SgVariableSymbol* vs_; + + IR_roseScalarSymbol(const IR_Code *ir, SgVariableSymbol *vs) { + ir_ = ir; + vs_ = vs; + } + + std::string name() const; + int size() const; + bool operator==(const IR_Symbol &that) const; + IR_Symbol *clone() const; +}; + +struct IR_roseArraySymbol: public IR_ArraySymbol { + + SgVariableSymbol* vs_; + + IR_roseArraySymbol(const IR_Code *ir, SgVariableSymbol* vs) { + ir_ = ir; + vs_ = vs; + } + std::string name() const; + int elem_size() const; + int n_dim() const; + omega::CG_outputRepr *size(int dim) const; + bool operator==(const IR_Symbol &that) const; + IR_ARRAY_LAYOUT_TYPE layout_type() const; + IR_Symbol *clone() const; + +}; + +struct IR_roseConstantRef: public IR_ConstantRef { + union { + omega::coef_t i_; + double f_; + }; + + IR_roseConstantRef(const IR_Code *ir, omega::coef_t i) { + ir_ = ir; + type_ = IR_CONSTANT_INT; + i_ = i; + } + IR_roseConstantRef(const IR_Code *ir, double f) { + ir_ = ir; + type_ = IR_CONSTANT_FLOAT; + f_ = f; + } + omega::coef_t integer() const { + assert(is_integer()); + return i_; + } + bool operator==(const IR_Ref &that) const; + omega::CG_outputRepr *convert(); + IR_Ref *clone() const; + +}; + +struct IR_roseScalarRef: public IR_ScalarRef { + SgAssignOp *ins_pos_; + int op_pos_; // -1 means destination operand, otherwise source operand + SgVarRefExp *vs_; + int is_write_; + IR_roseScalarRef(const IR_Code *ir, SgVarRefExp *sym) { + ir_ = ir; + ins_pos_ = isSgAssignOp(sym->get_parent()); + op_pos_ = 0; + if (ins_pos_ != NULL) + if (sym == isSgVarRefExp(ins_pos_->get_lhs_operand())) + op_pos_ = -1; + + vs_ = sym; + } + IR_roseScalarRef(const IR_Code *ir, SgVarRefExp *ins, int pos) { + ir_ = ir; + /* ins_pos_ = ins; + op_pos_ = pos; + SgExpression* op; + if (pos == -1) + op = ins->get_lhs_operand(); + else + op = ins->get_rhs_operand(); + + */ + + is_write_ = pos; + + /* if (vs_ == NULL || pos > 0) + throw ir_error( + "Src operand not a variable or more than one src operand!!"); + */ + + vs_ = ins; + + } + bool is_write() const; + IR_ScalarSymbol *symbol() const; + bool operator==(const IR_Ref &that) const; + omega::CG_outputRepr *convert(); + IR_Ref *clone() const; +}; + +struct IR_roseArrayRef: public IR_ArrayRef { + + SgPntrArrRefExp *ia_; + + int is_write_; + IR_roseArrayRef(const IR_Code *ir, SgPntrArrRefExp *ia, int write) { + ir_ = ir; + ia_ = ia; + is_write_ = write; + } + bool is_write() const; + omega::CG_outputRepr *index(int dim) const; + IR_ArraySymbol *symbol() const; + bool operator==(const IR_Ref &that) const; + omega::CG_outputRepr *convert(); + IR_Ref *clone() const; +}; + +struct IR_roseLoop: public IR_Loop { + SgNode *tf_; + + IR_roseLoop(const IR_Code *ir, SgNode *tf) { + ir_ = ir; + tf_ = tf; + } + + IR_ScalarSymbol *index() const; + omega::CG_outputRepr *lower_bound() const; + omega::CG_outputRepr *upper_bound() const; + IR_CONDITION_TYPE stop_cond() const; + IR_Block *body() const; + IR_Block *convert(); + int step_size() const; + IR_Control *clone() const; +}; + +struct IR_roseBlock: public IR_Block { + SgNode* tnl_; + SgNode *start_, *end_; + + IR_roseBlock(const IR_Code *ir, SgNode *tnl, SgNode *start, SgNode *end) { + ir_ = ir; + tnl_ = tnl; + start_ = start; + end_ = end; + } + + IR_roseBlock(const IR_Code *ir, SgNode *tnl) { + ir_ = ir; + tnl_ = tnl; + start_ = tnl_->get_traversalSuccessorByIndex(0); + end_ = tnl_->get_traversalSuccessorByIndex( + (tnl_->get_numberOfTraversalSuccessors()) - 1); + } + omega::CG_outputRepr *extract() const; + omega::CG_outputRepr *original() const; + IR_Control *clone() const; +}; + +struct IR_roseIf: public IR_If { + SgNode *ti_; + + IR_roseIf(const IR_Code *ir, SgNode *ti) { + ir_ = ir; + ti_ = ti; + } + ~IR_roseIf() { + } + omega::CG_outputRepr *condition() const; + IR_Block *then_body() const; + IR_Block *else_body() const; + IR_Block *convert(); + IR_Control *clone() const; +}; + +class IR_roseCode_Global_Init { +private: + static IR_roseCode_Global_Init *pinstance; +public: + SgProject* project; + static IR_roseCode_Global_Init *Instance(char** argv); +}; + +class IR_roseCode: public IR_Code { +protected: + SgSourceFile* file; + SgGlobal *root; + SgGlobal *firstScope; + SgSymbolTable* symtab_; + SgSymbolTable* symtab2_; + SgSymbolTable* symtab3_; + SgDeclarationStatementPtrList::iterator p; + SgFunctionDeclaration *func; + bool is_fortran_; + int i_; + StaticSingleAssignment *ssa_for_scalar; + ssa_unfiltered_cfg::SSA_UnfilteredCfg *main_ssa; + VariableRenaming *varRenaming_for_scalar; +public: + IR_roseCode(const char *filename, const char* proc_name); + ~IR_roseCode(); + + IR_ScalarSymbol *CreateScalarSymbol(const IR_Symbol *sym, int memory_type = + 0); + IR_ArraySymbol *CreateArraySymbol(const IR_Symbol *sym, + std::vector<omega::CG_outputRepr *> &size, int memory_type = 0); + IR_ScalarRef *CreateScalarRef(const IR_ScalarSymbol *sym); + IR_ArrayRef *CreateArrayRef(const IR_ArraySymbol *sym, + std::vector<omega::CG_outputRepr *> &index); + int ArrayIndexStartAt() { + if (is_fortran_) + return 1; + else + return 0; + } + + void populateLists(SgNode* tnl_1, SgStatementPtrList* list_1, + SgStatementPtrList& output_list_1); + void populateScalars(const omega::CG_outputRepr *repr1, + std::map<SgVarRefExp*, IR_ScalarRef*> &read_scalars_1, + std::map<SgVarRefExp*, IR_ScalarRef*> &write_scalars_1, + std::set<std::string> &indices, std::vector<std::string> &index); + // std::set<std::string> &def_vars); + /*void findDefinitions(SgStatementPtrList &list_1, + std::set<VirtualCFG::CFGNode> &reaching_defs_1, + std::map<SgVarRefExp*, IR_ScalarRef*> &write_scalars_1, + std::set<std::string> &def_vars); + */ + /* void checkDependency(SgStatementPtrList &output_list_1, + std::vector<DependenceVector> &dvs1, + std::map<SgVarRefExp*, IR_ScalarRef*> &read_scalars_1, + std::map<SgVarRefExp*, IR_ScalarRef*> &write_scalars_1, + std::vector<std::string> &index, int i, int j); + void checkSelfDependency(SgStatementPtrList &output_list_1, + std::vector<DependenceVector> &dvs1, + std::map<SgVarRefExp*, IR_ScalarRef*> &read_scalars_1, + std::map<SgVarRefExp*, IR_ScalarRef*> &write_scalars_1, + std::vector<std::string> &index, int i, int j); + void checkWriteDependency(SgStatementPtrList &output_list_1, + std::vector<DependenceVector> &dvs1, + std::map<SgVarRefExp*, IR_ScalarRef*> &read_scalars_1, + std::map<SgVarRefExp*, IR_ScalarRef*> &write_scalars_1, + std::vector<std::string> &index, int i, int j); + */ + std::vector<IR_ArrayRef *> FindArrayRef( + const omega::CG_outputRepr *repr) const; + std::vector<IR_ScalarRef *> FindScalarRef( + const omega::CG_outputRepr *repr) const; + std::vector<IR_Control *> FindOneLevelControlStructure( + const IR_Block *block) const; + IR_Block *MergeNeighboringControlStructures( + const std::vector<IR_Control *> &controls) const; + IR_Block *GetCode() const; + void ReplaceCode(IR_Control *old, omega::CG_outputRepr *repr); + void ReplaceExpression(IR_Ref *old, omega::CG_outputRepr *repr); + + IR_OPERATION_TYPE QueryExpOperation(const omega::CG_outputRepr *repr) const; + IR_CONDITION_TYPE QueryBooleanExpOperation( + const omega::CG_outputRepr *repr) const; + std::vector<omega::CG_outputRepr *> QueryExpOperand( + const omega::CG_outputRepr *repr) const; + IR_Ref *Repr2Ref(const omega::CG_outputRepr *) const; + /* std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > + FindScalarDeps(const omega::CG_outputRepr *repr1, + const omega::CG_outputRepr *repr2, std::vector<std::string> index, + int i, int j); + */ + void finalizeRose(); + friend class IR_roseArraySymbol; + friend class IR_roseArrayRef; +}; + +#endif diff --git a/ir_rose_utils.cc b/ir_rose_utils.cc new file mode 100644 index 0000000..fbce2f1 --- /dev/null +++ b/ir_rose_utils.cc @@ -0,0 +1,88 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009 University of Utah + All Rights Reserved. + + Purpose: + SUIF interface utilities. + + Notes: + + Update history: + 01/2006 created by Chun Chen +*****************************************************************************/ + +//#include <suif1.h> +//#include <useful.h> +//#include <vector> +//#include <algorithm> +//#include <code_gen/CG_suifRepr.h> +#include "ir_rose_utils.hh" + + + +std::vector<SgForStatement *> find_loops(SgNode *tnl) { + std::vector<SgForStatement *> result; + + //tree_node_list_iter iter(tnl); + + /*while (!iter.is_empty()) { + tree_node *tn = iter.step(); + if (tn->kind() == TREE_FOR) + result.push_back(static_cast<tree_for *>(tn)); + } + */ + + SgStatementPtrList& blockStatements = isSgBasicBlock(tnl)->get_statements(); + for(SgStatementPtrList::const_iterator j = blockStatements.begin(); j != blockStatements.end(); j++) + if(isSgForStatement(*j)) + result.push_back(isSgForStatement(*j)); + + return result; +} + +std::vector<SgForStatement *> find_deepest_loops(SgStatementPtrList& tnl) { + + std::vector<SgForStatement *> loops; + + + + for(SgStatementPtrList::const_iterator j = tnl.begin(); j != tnl.end(); j++) + { + std::vector<SgForStatement *> t = find_deepest_loops(isSgNode(*j)); + if (t.size() > loops.size()) + loops = t; + } + + + + return loops; + +} + + + + + + + + +std::vector<SgForStatement *> find_deepest_loops(SgNode *tn) { + if (isSgForStatement(tn)) { + std::vector<SgForStatement *> loops; + + SgForStatement *tnf = static_cast<SgForStatement*>(tn); + loops.insert(loops.end(), tnf); + std::vector<SgForStatement*> t = find_deepest_loops(isSgNode(tnf->get_loop_body())); + std::copy(t.begin(), t.end(), std::back_inserter(loops)); + + return loops; + } + else if (isSgBasicBlock(tn)) { + SgBasicBlock *tnb = static_cast<SgBasicBlock*>(tn); + return find_deepest_loops(tnb->get_statements()); + } + else + return std::vector<SgForStatement *>(); +} + diff --git a/ir_rose_utils.hh b/ir_rose_utils.hh new file mode 100644 index 0000000..503d7f4 --- /dev/null +++ b/ir_rose_utils.hh @@ -0,0 +1,18 @@ +#ifndef IR_ROSE_UTILS_HH +#define IR_ROSE_UTILS_HH +#include <vector> +#include "rose.h" +#include "sageBuilder.h" + + + +std::vector<SgForStatement *> find_deepest_loops(SgNode *tnl); +std::vector<SgForStatement *> find_loops(SgNode *tnl); + + + +SgNode* loop_body_at_level(SgNode* tnl, int level); +SgNode* loop_body_at_level(SgForStatement* loop, int level); +void swap_node_for_node_list(SgNode* tn, SgNode* new_tnl); + +#endif diff --git a/ir_suif.cc b/ir_suif.cc new file mode 100644 index 0000000..a0ea357 --- /dev/null +++ b/ir_suif.cc @@ -0,0 +1,1438 @@ +/***************************************************************************** + Copyright (C) 2009-2011 University of Utah + All Rights Reserved. + + Purpose: + CHiLL's SUIF interface. + + Notes: + Array supports mixed pointer and array type in a single declaration. + + History: + 02/23/2009 Created by Chun Chen. +*****************************************************************************/ + +#include <typeinfo> +#include <useful.h> +#include "ir_suif.hh" +#include "ir_suif_utils.hh" +#include "chill_error.hh" + +// ---------------------------------------------------------------------------- +// Class: IR_suifScalarSymbol +// ---------------------------------------------------------------------------- + +std::string IR_suifScalarSymbol::name() const { + return vs_->name(); +} + + +int IR_suifScalarSymbol::size() const { + return vs_->type()->size(); +} + + +bool IR_suifScalarSymbol::operator==(const IR_Symbol &that) const { + if (typeid(*this) != typeid(that)) + return false; + + const IR_suifScalarSymbol *l_that = static_cast<const IR_suifScalarSymbol *>(&that); + return this->vs_ == l_that->vs_; +} + +IR_Symbol *IR_suifScalarSymbol::clone() const { + return new IR_suifScalarSymbol(ir_, vs_); +} + +// ---------------------------------------------------------------------------- +// Class: IR_suifArraySymbol +// ---------------------------------------------------------------------------- + +std::string IR_suifArraySymbol::name() const { + return vs_->name(); +} + + +int IR_suifArraySymbol::elem_size() const { + type_node *tn = vs_->type(); + if (tn->is_modifier()) + tn = static_cast<modifier_type *>(tn)->base(); + + while (tn->is_array()) + tn = static_cast<array_type *>(tn)->elem_type(); + + return tn->size(); +} + + +int IR_suifArraySymbol::n_dim() const { + type_node *tn = vs_->type(); + if (tn->is_modifier()) + tn = static_cast<modifier_type *>(tn)->base(); + + int n = 0; + while (true) { + if (tn->is_array()) { + n++; + tn = static_cast<array_type *>(tn)->elem_type(); + } + else if (tn->is_ptr()) { + n++; + tn = static_cast<ptr_type *>(tn)->ref_type(); + } + else + break; + } + + return n - indirect_; +} + + +omega::CG_outputRepr *IR_suifArraySymbol::size(int dim) const { + type_node *tn = vs_->type(); + if (tn->is_modifier()) + tn = static_cast<modifier_type *>(tn)->base(); + + for (int i = 0; i < dim; i++) { + if (tn->is_array()) + tn = static_cast<array_type *>(tn)->elem_type(); + else if (tn->is_ptr()) + tn = static_cast<ptr_type *>(tn)->ref_type(); + else + throw ir_error("array parsing error"); + } + if (tn->is_ptr()) + return new omega::CG_suifRepr(operand()); + else if (!tn->is_array()) + throw ir_error("array parsing error"); + + array_bound ub = static_cast<array_type *>(tn)->upper_bound(); + int c = 1; + omega::CG_outputRepr *ub_repr = NULL; + if (ub.is_constant()) + c += ub.constant(); + else if (ub.is_variable()) { + var_sym *vs = ub.variable(); + + if (static_cast<const IR_suifCode *>(ir_)->init_code_ != NULL) { + tree_node_list *tnl = static_cast<omega::CG_suifRepr *>(static_cast<const IR_suifCode *>(ir_)->init_code_)->GetCode(); + tree_node_list_iter iter(tnl); + while(!iter.is_empty()) { + tree_node *tn = iter.step(); + if (tn->is_instr()) { + instruction *ins = static_cast<tree_instr *>(tn)->instr(); + operand dst = ins->dst_op(); + if (dst.is_symbol() && dst.symbol() == vs) { + operand op; + if (ins->opcode() == io_cpy) + op = ins->src_op(0).clone(); + else + op = operand(ins->clone()); + + ub_repr = new omega::CG_suifRepr(op); + break; + } + } + } + } + if (ub_repr == NULL) + ub_repr = new omega::CG_suifRepr(operand(vs)); + } + else + throw ir_error("array parsing error"); + + array_bound lb = static_cast<array_type *>(tn)->lower_bound(); + omega::CG_outputRepr *lb_repr = NULL; + if (lb.is_constant()) + c -= lb.constant(); + else if (lb.is_variable()) { + var_sym *vs = ub.variable(); + + tree_node_list *tnl = static_cast<omega::CG_suifRepr *>(static_cast<const IR_suifCode *>(ir_)->init_code_)->GetCode(); + tree_node_list_iter iter(tnl); + while(!iter.is_empty()) { + tree_node *tn = iter.step(); + if (tn->is_instr()) { + instruction *ins = static_cast<tree_instr *>(tn)->instr(); + operand dst = ins->dst_op(); + if (dst.is_symbol() && dst.symbol() == vs) { + operand op; + if (ins->opcode() == io_cpy) + op = ins->src_op(0).clone(); + else + op = operand(ins->clone()); + + lb_repr = new omega::CG_suifRepr(op); + break; + } + } + } + if (lb_repr == NULL) + lb_repr = new omega::CG_suifRepr(operand(vs)); + } + else + throw ir_error("array parsing error"); + + omega::CG_outputRepr *repr = ir_->builder()->CreateMinus(ub_repr, lb_repr); + if (c != 0) + repr = ir_->builder()->CreatePlus(repr, ir_->builder()->CreateInt(c)); + + return repr; +} + + +IR_ARRAY_LAYOUT_TYPE IR_suifArraySymbol::layout_type() const { + if (static_cast<const IR_suifCode *>(ir_)->is_fortran_) + return IR_ARRAY_LAYOUT_COLUMN_MAJOR; + else + return IR_ARRAY_LAYOUT_ROW_MAJOR; +} + + +bool IR_suifArraySymbol::operator==(const IR_Symbol &that) const { + if (typeid(*this) != typeid(that)) + return false; + + const IR_suifArraySymbol *l_that = static_cast<const IR_suifArraySymbol *>(&that); + return this->vs_ == l_that->vs_ && this->offset_ == l_that->offset_; +} + + +IR_Symbol *IR_suifArraySymbol::clone() const { + return new IR_suifArraySymbol(ir_, vs_, indirect_, offset_); +} + +// ---------------------------------------------------------------------------- +// Class: IR_suifConstantRef +// ---------------------------------------------------------------------------- + +bool IR_suifConstantRef::operator==(const IR_Ref &that) const { + if (typeid(*this) != typeid(that)) + return false; + + const IR_suifConstantRef *l_that = static_cast<const IR_suifConstantRef *>(&that); + + if (this->type_ != l_that->type_) + return false; + + if (this->type_ == IR_CONSTANT_INT) + return this->i_ == l_that->i_; + else + return this->f_ == l_that->f_; +} + + +omega::CG_outputRepr *IR_suifConstantRef::convert() { + if (type_ == IR_CONSTANT_INT) { + omega::CG_suifRepr *result = new omega::CG_suifRepr(operand(static_cast<int>(i_), type_s32)); + delete this; + return result; + } + else + throw ir_error("constant type not supported"); +} + + +IR_Ref *IR_suifConstantRef::clone() const { + if (type_ == IR_CONSTANT_INT) + return new IR_suifConstantRef(ir_, i_); + else if (type_ == IR_CONSTANT_FLOAT) + return new IR_suifConstantRef(ir_, f_); + else + throw ir_error("constant type not supported"); +} + + +// ---------------------------------------------------------------------------- +// Class: IR_suifScalarRef +// ---------------------------------------------------------------------------- + +bool IR_suifScalarRef::is_write() const { + if (ins_pos_ != NULL && op_pos_ == -1) + return true; + else + return false; +} + + +IR_ScalarSymbol *IR_suifScalarRef::symbol() const { + return new IR_suifScalarSymbol(ir_, vs_); +} + + +bool IR_suifScalarRef::operator==(const IR_Ref &that) const { + if (typeid(*this) != typeid(that)) + return false; + + const IR_suifScalarRef *l_that = static_cast<const IR_suifScalarRef *>(&that); + + if (this->ins_pos_ == NULL) + return this->vs_ == l_that->vs_; + else + return this->ins_pos_ == l_that->ins_pos_ && this->op_pos_ == l_that->op_pos_; +} + + +omega::CG_outputRepr *IR_suifScalarRef::convert() { + omega::CG_suifRepr *result = new omega::CG_suifRepr(operand(vs_)); + delete this; + return result; +} + + +IR_Ref * IR_suifScalarRef::clone() const { + if (ins_pos_ == NULL) + return new IR_suifScalarRef(ir_, vs_); + else + return new IR_suifScalarRef(ir_, ins_pos_, op_pos_); +} + + +// ---------------------------------------------------------------------------- +// Class: IR_suifArrayRef +// ---------------------------------------------------------------------------- + +bool IR_suifArrayRef::is_write() const { + return ::is_lhs(const_cast<in_array *>(ia_)); +} + + +omega::CG_outputRepr *IR_suifArrayRef::index(int dim) const { + operand op = find_array_index(ia_, n_dim(), dim, static_cast<const IR_suifCode *>(ir_)->is_fortran_); + return new omega::CG_suifRepr(op.clone()); +} + + +IR_ArraySymbol *IR_suifArrayRef::symbol() const { + in_array *current = ia_; + + // find the indirectness of the symbol, i.e., if it is (**A)[i,j] + int indirect = 0; + if (!static_cast<const IR_suifCode *>(ir_)->is_fortran_) { + operand op = ia_->base_op(); + while (op.is_instr()) { + instruction *ins = op.instr(); + if (ins->opcode() == io_lod) { + indirect++; + op = ins->src_op(0); + } + else + break; + } + if (op.is_symbol()) + indirect++; + } + + while (true) { + operand op = current->base_op(); + if (op.is_symbol()) { + return new IR_suifArraySymbol(ir_, op.symbol(), indirect); + } + else if (op.is_instr()) { + instruction *ins = op.instr(); + if (ins->opcode() == io_ldc) { + immed value = static_cast<in_ldc *>(ins)->value(); + if (value.is_symbol()) { + sym_node *the_sym = value.symbol(); + if (the_sym->is_var()) + return new IR_suifArraySymbol(ir_, static_cast<var_sym *>(the_sym), indirect); + else + break; + } + else + break; + } + else if (ins->opcode() == io_cvt) { + operand op = static_cast<in_rrr *>(ins)->src_op(); + if (op.is_symbol()) { + return new IR_suifArraySymbol(ir_, op.symbol(), indirect); + } + else if (op.is_instr()) { + instruction *ins = op.instr(); + if (ins->opcode() == io_lod) { + operand op = static_cast<in_rrr *>(ins)->src_op(); + if (op.is_symbol()) { + return new IR_suifArraySymbol(ir_, op.symbol(), indirect); + } + else if (op.is_instr()) { + instruction *ins = op.instr(); + if (ins->opcode() == io_array) { + current = static_cast<in_array *>(ins); + continue; + } + else if (ins->opcode() == io_add) { + operand op1 = ins->src_op(0); + operand op2 = ins->src_op(1); + if (!op1.is_symbol() || !op2.is_immed()) + throw ir_error("can't recognize array reference format"); + immed im = op2.immediate(); + if (!im.is_integer()) + throw ir_error("can't recognize array reference format"); + return new IR_suifArraySymbol(ir_, op1.symbol(), indirect, im.integer()); + } + else + break; + } + else + break; + } + else + break; + } + else + break; + } + else { + while (ins->opcode() == io_lod) { + operand op = ins->src_op(0); + if (op.is_instr()) + ins = op.instr(); + else if (op.is_symbol()) + return new IR_suifArraySymbol(ir_, op.symbol(), indirect); + else + break; + } + break; + } + } + else + break; + } + + fprintf(stderr, "Warning: null array symbol found, dependence graph bloated!\n"); + + return new IR_suifArraySymbol(ir_, NULL); +} + + +bool IR_suifArrayRef::operator==(const IR_Ref &that) const { + if (typeid(*this) != typeid(that)) + return false; + + const IR_suifArrayRef *l_that = static_cast<const IR_suifArrayRef *>(&that); + + return this->ia_ == l_that ->ia_; +} + + +omega::CG_outputRepr *IR_suifArrayRef::convert() { + omega::CG_suifRepr *result = new omega::CG_suifRepr(operand(this->ia_->clone())); + delete this; + return result; +} + + +IR_Ref *IR_suifArrayRef::clone() const { + return new IR_suifArrayRef(ir_, ia_); +} + + + +// ---------------------------------------------------------------------------- +// Class: IR_suifLoop +// ---------------------------------------------------------------------------- + +IR_ScalarSymbol *IR_suifLoop::index() const { + var_sym *vs = tf_->index(); + return new IR_suifScalarSymbol(ir_, vs); +} + +omega::CG_outputRepr *IR_suifLoop::lower_bound() const { + tree_node_list *tnl = tf_->lb_list(); + tree_node_list_iter iter(tnl); + if (iter.is_empty()) + return new omega::CG_suifRepr(operand()); + tree_node *tn = iter.step(); + if (!iter.is_empty()) + throw ir_error("cannot handle lower bound"); + if (tn->kind() != TREE_INSTR) + throw ir_error("cannot handle lower bound"); + instruction *ins = static_cast<tree_instr *>(tn)->instr(); + return new omega::CG_suifRepr(operand(ins)); +} + +omega::CG_outputRepr *IR_suifLoop::upper_bound() const { + tree_node_list *tnl = tf_->ub_list(); + tree_node_list_iter iter(tnl); + if (iter.is_empty()) + return new omega::CG_suifRepr(operand()); + tree_node *tn = iter.step(); + if (!iter.is_empty()) + throw ir_error("cannot handle lower bound"); + if (tn->kind() != TREE_INSTR) + throw ir_error("cannot handle lower bound"); + instruction *ins = static_cast<tree_instr *>(tn)->instr(); + return new omega::CG_suifRepr(operand(ins)); +} + +IR_CONDITION_TYPE IR_suifLoop::stop_cond() const { + if (tf_->test() == FOR_SLT || tf_->test() == FOR_ULT) + return IR_COND_LT; + else if (tf_->test() == FOR_SLTE || tf_->test() == FOR_ULTE) + return IR_COND_LE; + else if (tf_->test() == FOR_SGT || tf_->test() == FOR_UGT) + return IR_COND_GT; + else if (tf_->test() == FOR_SGTE || tf_->test() == FOR_UGTE) + return IR_COND_GE; + else + throw ir_error("loop stop condition unsupported"); +} + +IR_Block *IR_suifLoop::body() const { + tree_node_list *tnl = tf_->body(); + return new IR_suifBlock(ir_, tnl); +} + +int IR_suifLoop::step_size() const { + operand op = tf_->step_op(); + if (!op.is_null()) { + if (op.is_immed()) { + immed im = op.immediate(); + if (im.is_integer()) + return im.integer(); + else + throw ir_error("cannot handle non-integer stride"); + } + else + throw ir_error("cannot handle non-constant stride"); + } + else + return 1; +} + + +IR_Block *IR_suifLoop::convert() { + const IR_Code *ir = ir_; + tree_node_list *tnl = tf_->parent(); + tree_node_list_e *start, *end; + start = end = tf_->list_e(); + delete this; + return new IR_suifBlock(ir, tnl, start, end); +} + + +IR_Control *IR_suifLoop::clone() const { + return new IR_suifLoop(ir_, tf_); +} + +// ---------------------------------------------------------------------------- +// Class: IR_suifBlock +// ---------------------------------------------------------------------------- + +omega::CG_outputRepr *IR_suifBlock::extract() const { + tree_node_list *tnl = new tree_node_list; + tree_node_list_iter iter(tnl_); + while (!iter.is_empty()) { + tree_node *tn = iter.peek(); + if (tn->list_e() == start_) + break; + tn = iter.step(); + } + + while (!iter.is_empty()) { + tree_node *tn = iter.step(); + tnl->append(tn->clone()); + if (tn->list_e() == end_) + break; + } + + return new omega::CG_suifRepr(tnl); +} + +IR_Control *IR_suifBlock::clone() const { + return new IR_suifBlock(ir_, tnl_, start_, end_); +} + + +// ---------------------------------------------------------------------------- +// Class: IR_suifIf +// ---------------------------------------------------------------------------- +omega::CG_outputRepr *IR_suifIf::condition() const { + tree_node_list *tnl = ti_->header(); + tree_node_list_iter iter(tnl); + if (iter.is_empty()) + throw ir_error("unrecognized if structure"); + tree_node *tn = iter.step(); + if (!iter.is_empty()) + throw ir_error("unrecognized if structure"); + if (!tn->is_instr()) + throw ir_error("unrecognized if structure"); + instruction *ins = static_cast<tree_instr *>(tn)->instr(); + if (!ins->opcode() == io_bfalse) + throw ir_error("unrecognized if structure"); + operand op = ins->src_op(0); + return new omega::CG_suifRepr(op); +} + +IR_Block *IR_suifIf::then_body() const { + tree_node_list *tnl = ti_->then_part(); + if (tnl == NULL) + return NULL; + tree_node_list_iter iter(tnl); + if (iter.is_empty()) + return NULL; + + return new IR_suifBlock(ir_, tnl); +} + +IR_Block *IR_suifIf::else_body() const { + tree_node_list *tnl = ti_->else_part(); + if (tnl == NULL) + return NULL; + tree_node_list_iter iter(tnl); + if (iter.is_empty()) + return NULL; + + return new IR_suifBlock(ir_, tnl); +} + + +IR_Block *IR_suifIf::convert() { + const IR_Code *ir = ir_; + tree_node_list *tnl = ti_->parent(); + tree_node_list_e *start, *end; + start = end = ti_->list_e(); + delete this; + return new IR_suifBlock(ir, tnl, start, end); +} + + +IR_Control *IR_suifIf::clone() const { + return new IR_suifIf(ir_, ti_); +} + + +// ---------------------------------------------------------------------------- +// Class: IR_suifCode_Global_Init +// ---------------------------------------------------------------------------- + +IR_suifCode_Global_Init *IR_suifCode_Global_Init::pinstance = NULL; + + +IR_suifCode_Global_Init *IR_suifCode_Global_Init::Instance () { + if (pinstance == NULL) + pinstance = new IR_suifCode_Global_Init; + return pinstance; +} + + +IR_suifCode_Global_Init::IR_suifCode_Global_Init() { + LIBRARY(useful, init_useful, exit_useful); + LIBRARY(annotes, init_annotes, exit_annotes); + + int argc = 1; + char *argv[1]; + argv[0] = "chill"; + init_suif(argc, argv); +} + + +// ---------------------------------------------------------------------------- +// Class: IR_suifCode_Global_Cleanup +// ---------------------------------------------------------------------------- + +IR_suifCode_Global_Cleanup::~IR_suifCode_Global_Cleanup() { + delete IR_suifCode_Global_Init::Instance(); + exit_suif1(); +} + + +namespace { + IR_suifCode_Global_Cleanup suifcode_global_cleanup_instance; +} + +// ---------------------------------------------------------------------------- +// Class: IR_suifCode +// ---------------------------------------------------------------------------- + +IR_suifCode::IR_suifCode(const char *filename, int proc_num): IR_Code() { + IR_suifCode_Global_Init::Instance(); + + std::string new_filename(filename); + int pos = new_filename.find_last_of('.'); + new_filename = new_filename.substr(0, pos) + ".lxf"; + fileset->add_file(const_cast<char *>(filename), const_cast<char *>(new_filename.c_str())); + fileset->reset_iter(); + fse_ = fileset->next_file(); + fse_->reset_proc_iter(); + + int cur_proc = 0; + while ((psym_ = fse_->next_proc()) && cur_proc < proc_num) + ++cur_proc; + if (cur_proc != proc_num) { + throw ir_error("procedure number cannot be found"); + } + + if (psym_->src_lang() == src_fortran) + is_fortran_ = true; + else + is_fortran_ = false; + + if (!psym_->is_in_memory()) + psym_->read_proc(TRUE, is_fortran_); + push_clue(psym_->block()); + + symtab_ = psym_->block()->proc_syms(); + ocg_ = new omega::CG_suifBuilder(symtab_); +} + + +IR_suifCode::~IR_suifCode() { + tree_node_list *tnl = psym_->block()->body(); + + if (init_code_ != NULL) + tnl->insert_before(static_cast<omega::CG_suifRepr *>(init_code_)->GetCode(), tnl->head()); + if (cleanup_code_ != NULL) + tnl->insert_after(static_cast<omega::CG_suifRepr *>(cleanup_code_)->GetCode(), tnl->tail()); + + pop_clue(psym_->block()); + if (!psym_->is_written()) + psym_->write_proc(fse_); + psym_->flush_proc(); +} + + +IR_ScalarSymbol *IR_suifCode::CreateScalarSymbol(const IR_Symbol *sym, int) { + if (typeid(*sym) == typeid(IR_suifScalarSymbol)) { + type_node *tn = static_cast<const IR_suifScalarSymbol *>(sym)->vs_->type(); + while (tn->is_modifier()) + tn = static_cast<modifier_type *>(tn)->base(); + var_sym *vs = symtab_->new_unique_var(tn); + return new IR_suifScalarSymbol(this, vs); + } + else if (typeid(*sym) == typeid(IR_suifArraySymbol)) { + type_node *tn = static_cast<const IR_suifArraySymbol *>(sym)->vs_->type(); + while (tn->is_modifier()) + tn = static_cast<modifier_type *>(tn)->base(); + while (tn->is_array() || tn->is_ptr()) { + if (tn->is_array()) + tn = static_cast<array_type *>(tn)->elem_type(); + else if (tn->is_ptr()) + tn = static_cast<ptr_type *>(tn)->ref_type(); + } + while (tn->is_modifier()) + tn = static_cast<modifier_type *>(tn)->base(); + var_sym *vs = symtab_->new_unique_var(tn); + return new IR_suifScalarSymbol(this, vs); + } + else + throw std::bad_typeid(); +} + + +IR_ArraySymbol *IR_suifCode::CreateArraySymbol(const IR_Symbol *sym, std::vector<omega::CG_outputRepr *> &size, int) { + type_node *tn; + + if (typeid(*sym) == typeid(IR_suifScalarSymbol)) { + tn = static_cast<const IR_suifScalarSymbol *>(sym)->vs_->type(); + } + else if (typeid(*sym) == typeid(IR_suifArraySymbol)) { + tn = static_cast<const IR_suifArraySymbol *>(sym)->vs_->type(); + if (tn->is_modifier()) + tn = static_cast<modifier_type *>(tn)->base(); + while (tn->is_array() || tn->is_ptr()) { + if (tn->is_array()) + tn = static_cast<array_type *>(tn)->elem_type(); + else if (tn->is_ptr()) + tn = static_cast<ptr_type *>(tn)->ref_type(); + } + } + else + throw std::bad_typeid(); + + if (is_fortran_) + for (int i = 0; i < size.size(); i++) { + var_sym *temporary = symtab_->new_unique_var(type_s32); + init_code_ = ocg_->StmtListAppend(init_code_, ocg_->CreateAssignment(0, new omega::CG_suifRepr(operand(temporary)), size[i])); + + tn = new array_type(tn, array_bound(1), array_bound(temporary)); + symtab_->add_type(tn); + } + else + for (int i = size.size()-1; i >= 0; i--) { + var_sym *temporary = symtab_->new_unique_var(type_s32); + init_code_ = ocg_->StmtListAppend(init_code_, ocg_->CreateAssignment(0, new omega::CG_suifRepr(operand(temporary)), size[i])); + + tn = new array_type(tn, array_bound(1), array_bound(temporary)); + symtab_->add_type(tn); + } + + static int suif_array_counter = 1; + std::string s = std::string("_P") + omega::to_string(suif_array_counter++); + var_sym *vs = new var_sym(tn, const_cast<char *>(s.c_str())); + vs->add_to_table(symtab_); + + return new IR_suifArraySymbol(this, vs); +} + + +IR_ScalarRef *IR_suifCode::CreateScalarRef(const IR_ScalarSymbol *sym) { + return new IR_suifScalarRef(this, static_cast<const IR_suifScalarSymbol *>(sym)->vs_); +} + + +IR_ArrayRef *IR_suifCode::CreateArrayRef(const IR_ArraySymbol *sym, std::vector<omega::CG_outputRepr *> &index) { + if (sym->n_dim() != index.size()) + throw std::invalid_argument("incorrect array symbol dimensionality"); + + const IR_suifArraySymbol *l_sym = static_cast<const IR_suifArraySymbol *>(sym); + + var_sym *vs = l_sym->vs_; + type_node *tn1 = vs->type(); + if (tn1->is_modifier()) + tn1 = static_cast<modifier_type *>(tn1)->base(); + + type_node *tn2 = tn1; + while (tn2->is_array() || tn2->is_ptr()) { + if (tn2->is_array()) + tn2 = static_cast<array_type *>(tn2)->elem_type(); + else if (tn2->is_ptr()) + tn2 = static_cast<ptr_type *>(tn2)->ref_type(); + } + + instruction *base_ins; + if (tn1->is_ptr()) { + base_symtab *cur_symtab; + + cur_symtab = symtab_; + type_node *found_array_tn = NULL; + while (cur_symtab != NULL) { + type_node_list_iter iter(cur_symtab->types()); + while (!iter.is_empty()) { + type_node *tn = iter.step(); + if (!tn->is_array()) + continue; + if (static_cast<array_type *>(tn)->elem_type() == static_cast<ptr_type *>(tn1)->ref_type()) { + array_bound b = static_cast<array_type *>(tn)->upper_bound(); + if (b.is_unknown()) { + found_array_tn = tn; + break; + } + } + } + if (found_array_tn == NULL) + cur_symtab = cur_symtab->parent(); + else + break; + } + + cur_symtab = symtab_; + type_node *found_ptr_array_tn = NULL; + while (cur_symtab != NULL) { + type_node_list_iter iter(cur_symtab->types()); + while (!iter.is_empty()) { + type_node *tn = iter.step(); + if (!tn->is_ptr()) + continue; + if (static_cast<ptr_type *>(tn)->ref_type() == found_array_tn) { + found_ptr_array_tn = tn; + break; + } + } + if (found_ptr_array_tn == NULL) + cur_symtab = cur_symtab->parent(); + else + break; + } + + if (found_ptr_array_tn == NULL) + throw ir_error("can't find the type for the to-be-created array"); + base_ins = new in_rrr(io_cvt, found_ptr_array_tn, operand(), operand(vs)); + } + else { + base_ins = new in_ldc(tn1->ptr_to(), operand(), immed(vs)); + } + + in_array *ia = new in_array(tn2->ptr_to(), operand(), operand(base_ins), tn2->size(), l_sym->n_dim()); + + for (int i = 0; i < index.size(); i++) { + int t; + if (is_fortran_) + t = index.size() - i - 1; + else + t = i; + + omega::CG_suifRepr *bound = static_cast<omega::CG_suifRepr *>(l_sym->size(t)); + ia->set_bound(t, bound->GetExpression()); + delete bound; + omega::CG_suifRepr *idx = static_cast<omega::CG_suifRepr *>(index[i]); + ia->set_index(t, idx->GetExpression()); + delete idx; + } + + return new IR_suifArrayRef(this, ia); +} + + +std::vector<IR_ArrayRef *> IR_suifCode::FindArrayRef(const omega::CG_outputRepr *repr) const { + std::vector<IR_ArrayRef *> arrays; + + tree_node_list *tnl = static_cast<const omega::CG_suifRepr *>(repr)->GetCode(); + if (tnl != NULL) { + tree_node_list_iter iter(tnl); + while (!iter.is_empty()) { + tree_node *tn = iter.step(); + switch (tn->kind()) { + case TREE_FOR: { + tree_for *tnf = static_cast<tree_for *>(tn); + omega::CG_suifRepr *r = new omega::CG_suifRepr(tnf->body()); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + break; + } + case TREE_IF: { + tree_if *tni = static_cast<tree_if *>(tn); + omega::CG_suifRepr *r = new omega::CG_suifRepr(tni->header()); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + r = new omega::CG_suifRepr(tni->then_part()); + a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + r = new omega::CG_suifRepr(tni->else_part()); + a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + break; + } + case TREE_BLOCK: { + omega::CG_suifRepr *r = new omega::CG_suifRepr(static_cast<tree_block *>(tn)->body()); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + break; + } + case TREE_INSTR: { + omega::CG_suifRepr *r = new omega::CG_suifRepr(operand(static_cast<tree_instr *>(tn)->instr())); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + break; + } + default: + throw ir_error("control structure not supported"); + } + } + } + else { + operand op = static_cast<const omega::CG_suifRepr *>(repr)->GetExpression(); + if (op.is_instr()) { + instruction *ins = op.instr(); + switch (ins->opcode()) { + case io_array: { + IR_suifArrayRef *ref = new IR_suifArrayRef(this, static_cast<in_array *>(ins)); + for (int i = 0; i < ref->n_dim(); i++) { + omega::CG_suifRepr *r = new omega::CG_suifRepr(find_array_index(ref->ia_, ref->n_dim(), i, is_fortran_)); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } + arrays.push_back(ref); + break; + } + case io_str: + case io_memcpy: { + omega::CG_suifRepr *r1 = new omega::CG_suifRepr(ins->src_op(1)); + std::vector<IR_ArrayRef *> a1 = FindArrayRef(r1); + delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(arrays)); + omega::CG_suifRepr *r2 = new omega::CG_suifRepr(ins->src_op(0)); + std::vector<IR_ArrayRef *> a2 = FindArrayRef(r2); + delete r2; + std::copy(a2.begin(), a2.end(), back_inserter(arrays)); + break; + } + default: + for (int i = 0; i < ins->num_srcs(); i++) { + omega::CG_suifRepr *r = new omega::CG_suifRepr(ins->src_op(i)); + std::vector<IR_ArrayRef *> a = FindArrayRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(arrays)); + } + } + } + } + + return arrays; +} + + +std::vector<IR_ScalarRef *> IR_suifCode::FindScalarRef(const omega::CG_outputRepr *repr) const { + std::vector<IR_ScalarRef *> scalars; + + tree_node_list *tnl = static_cast<const omega::CG_suifRepr *>(repr)->GetCode(); + if (tnl != NULL) { + tree_node_list_iter iter(tnl); + while (!iter.is_empty()) { + tree_node *tn = iter.step(); + switch (tn->kind()) { + case TREE_FOR: { + tree_for *tnf = static_cast<tree_for *>(tn); + omega::CG_suifRepr *r = new omega::CG_suifRepr(tnf->body()); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + break; + } + case TREE_IF: { + tree_if *tni = static_cast<tree_if *>(tn); + omega::CG_suifRepr *r = new omega::CG_suifRepr(tni->header()); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + r = new omega::CG_suifRepr(tni->then_part()); + a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + r = new omega::CG_suifRepr(tni->else_part()); + a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + break; + } + case TREE_BLOCK: { + omega::CG_suifRepr *r = new omega::CG_suifRepr(static_cast<tree_block *>(tn)->body()); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + break; + } + case TREE_INSTR: { + omega::CG_suifRepr *r = new omega::CG_suifRepr(operand(static_cast<tree_instr *>(tn)->instr())); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + break; + } + default: + throw ir_error("control structure not supported"); + } + } + } + else { + operand op = static_cast<const omega::CG_suifRepr *>(repr)->GetExpression(); + if (op.is_instr()) { + instruction *ins = op.instr(); + for (int i = 0; i < ins->num_srcs(); i++) { + operand op = ins->src_op(i); + if (op.is_symbol()) + scalars.push_back(new IR_suifScalarRef(this, ins, i)); + else if (op.is_instr()) { + omega::CG_suifRepr *r = new omega::CG_suifRepr(op); + std::vector<IR_ScalarRef *> a = FindScalarRef(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } + } + + operand op = ins->dst_op(); + if (op.is_symbol()) + scalars.push_back(new IR_suifScalarRef(this, ins, -1)); + } + else if (op.is_symbol()) + scalars.push_back(new IR_suifScalarRef(this, op.symbol())); + } + + return scalars; +} + + +std::vector<IR_Control *> IR_suifCode::FindOneLevelControlStructure(const IR_Block *block) const { + std::vector<IR_Control *> controls; + + IR_suifBlock *l_block = static_cast<IR_suifBlock *>(const_cast<IR_Block *>(block)); + tree_node_list_iter iter(l_block->tnl_); + while(!iter.is_empty()) { + tree_node *tn = iter.peek(); + if (tn->list_e() == l_block->start_) + break; + iter.step(); + } + tree_node_list_e *start = NULL; + tree_node_list_e *prev = NULL; + while (!iter.is_empty()) { + tree_node *tn = iter.step(); + if (tn->kind() == TREE_FOR) { + if (start != NULL) { + controls.push_back(new IR_suifBlock(this, l_block->tnl_, start, prev)); + start = NULL; + } + controls.push_back(new IR_suifLoop(this, static_cast<tree_for *>(tn))); + } + else if (tn->kind() == TREE_IF) { + if (start != NULL) { + controls.push_back(new IR_suifBlock(this, l_block->tnl_, start, prev)); + start = NULL; + } + controls.push_back(new IR_suifIf(this, static_cast<tree_if *>(tn))); + } + else if (start == NULL && !is_null_statement(tn)) { + start = tn->list_e(); + } + prev = tn->list_e(); + if (prev == l_block->end_) + break; + } + + if (start != NULL && start != l_block->start_) + controls.push_back(new IR_suifBlock(this, l_block->tnl_, start, prev)); + + return controls; +} + + +IR_Block *IR_suifCode::MergeNeighboringControlStructures(const std::vector<IR_Control *> &controls) const { + if (controls.size() == 0) + return NULL; + + tree_node_list *tnl = NULL; + tree_node_list_e *start, *end; + for (int i = 0; i < controls.size(); i++) { + switch (controls[i]->type()) { + case IR_CONTROL_LOOP: { + tree_for *tf = static_cast<IR_suifLoop *>(controls[i])->tf_; + if (tnl == NULL) { + tnl = tf->parent(); + start = end = tf->list_e(); + } + else { + if (tnl != tf->parent()) + throw ir_error("controls to merge not at the same level"); + end = tf->list_e(); + } + break; + } + case IR_CONTROL_BLOCK: { + if (tnl == NULL) { + tnl = static_cast<IR_suifBlock *>(controls[0])->tnl_; + start = static_cast<IR_suifBlock *>(controls[0])->start_; + end = static_cast<IR_suifBlock *>(controls[0])->end_; + } + else { + if (tnl != static_cast<IR_suifBlock *>(controls[0])->tnl_) + throw ir_error("controls to merge not at the same level"); + end = static_cast<IR_suifBlock *>(controls[0])->end_; + } + break; + } + default: + throw ir_error("unrecognized control to merge"); + } + } + + return new IR_suifBlock(controls[0]->ir_, tnl, start, end); +} + + +IR_Block *IR_suifCode::GetCode() const { + return new IR_suifBlock(this, psym_->block()->body()); +} + + +void IR_suifCode::ReplaceCode(IR_Control *old, omega::CG_outputRepr *repr) { + tree_node_list *tnl = static_cast<omega::CG_suifRepr *>(repr)->GetCode(); + + switch (old->type()) { + case IR_CONTROL_LOOP: { + tree_for *tf_old = static_cast<IR_suifLoop *>(old)->tf_; + tree_node_list *tnl_old = tf_old->parent(); + + tnl_old->insert_before(tnl, tf_old->list_e()); + tnl_old->remove(tf_old->list_e()); + delete tf_old; + + break; + } + case IR_CONTROL_BLOCK: { + IR_suifBlock *sb = static_cast<IR_suifBlock *>(old); + tree_node_list_iter iter(sb->tnl_); + bool need_deleting = false; + while (!iter.is_empty()) { + tree_node *tn = iter.step(); + tree_node_list_e *pos = tn->list_e(); + if (pos == sb->start_) { + sb->tnl_->insert_before(tnl, pos); + need_deleting = true; + } + if (need_deleting) { + sb->tnl_->remove(pos); + delete tn; + } + if (pos == sb->end_) + break; + } + + break; + } + default: + throw ir_error("control structure to be replaced not supported"); + } + + delete old; + delete repr; +} + + +void IR_suifCode::ReplaceExpression(IR_Ref *old, omega::CG_outputRepr *repr) { + operand op = static_cast<omega::CG_suifRepr *>(repr)->GetExpression(); + + if (typeid(*old) == typeid(IR_suifArrayRef)) { + in_array *ia_orig = static_cast<IR_suifArrayRef *>(old)->ia_; + + if (op.is_instr()) { + instruction *ia_repl = op.instr(); + if (ia_repl->opcode() == io_array) { + if (ia_orig->elem_type()->is_struct()) { + static_cast<in_array *>(ia_repl)->set_offset(ia_orig->offset()); + struct_type *tn = static_cast<struct_type *>(ia_orig->elem_type()); + int left; + type_node *field_tn = tn->field_type(tn->find_field_by_offset(ia_orig->offset(), left)); + static_cast<in_array *>(ia_repl)->set_result_type(field_tn->ptr_to()); + } + replace_instruction(ia_orig, ia_repl); + delete ia_orig; + } + else { + instruction *parent_instr = ia_orig->dst_op().instr(); + if (parent_instr->opcode() == io_str) { + throw ir_error("replace left hand arrary reference not supported yet"); + } + else if (parent_instr->opcode() == io_lod) { + instruction *instr = parent_instr->dst_op().instr(); + if (instr->dst_op() == operand(parent_instr)) { + parent_instr->remove(); + instr->set_dst(op); + } + else { + for (int i = 0; i < instr->num_srcs(); i++) + if (instr->src_op(i) == operand(parent_instr)) { + parent_instr->remove(); + instr->set_src_op(i, op); + break; + } + } + + delete parent_instr; + } + else + throw ir_error("array reference to be replaced does not appear in any instruction"); + } + } + else if (op.is_symbol()) { + var_sym *vs = op.symbol(); + instruction *parent_instr = ia_orig->dst_op().instr(); + if (parent_instr->opcode() == io_str) { + tree_node *tn = parent_instr->parent(); + operand op = parent_instr->src_op(1).clone(); + instruction *new_instr = new in_rrr(io_cpy, vs->type(), operand(vs), op); + tree_node_list *tnl = tn->parent(); + tnl->insert_before(new tree_instr(new_instr), tn->list_e()); + tnl->remove(tn->list_e()); + + delete tn; + } + else if (parent_instr->opcode() == io_lod) { + instruction *instr = parent_instr->dst_op().instr(); + if (instr->dst_op() == operand(parent_instr)) { + parent_instr->remove(); + instr->set_dst(operand(vs)); + } + else { + for (int i = 0; i < instr->num_srcs(); i++) + if (instr->src_op(i) == operand(parent_instr)) { + parent_instr->remove(); + instr->set_src_op(i, operand(vs)); + break; + } + } + + delete parent_instr; + } + else + throw ir_error("array reference to be replaced does not appear in any instruction"); + } + else + throw ir_error("can't handle replacement expression"); + } + else + throw ir_error("replacing a scalar variable not implemented"); + + delete old; + delete repr; +} + + + +IR_OPERATION_TYPE IR_suifCode::QueryExpOperation(const omega::CG_outputRepr *repr) const { + operand op = static_cast<const omega::CG_suifRepr *>(repr)->GetExpression(); + + if (op.is_immed()) + return IR_OP_CONSTANT; + else if (op.is_symbol()) + return IR_OP_VARIABLE; + else if (op.is_instr()) { + instruction *ins = op.instr(); + switch (ins->opcode()) { + case io_cpy: + return IR_OP_ASSIGNMENT; + case io_add: + return IR_OP_PLUS; + case io_sub: + return IR_OP_MINUS; + case io_mul: + return IR_OP_MULTIPLY; + case io_div: + return IR_OP_DIVIDE; + case io_neg: + return IR_OP_NEGATIVE; + case io_min: + return IR_OP_MIN; + case io_max: + return IR_OP_MAX; + case io_cvt: + return IR_OP_POSITIVE; + default: + return IR_OP_UNKNOWN; + } + } + else if (op.is_null()) + return IR_OP_NULL; + else + return IR_OP_UNKNOWN; +} + + +IR_CONDITION_TYPE IR_suifCode::QueryBooleanExpOperation(const omega::CG_outputRepr *repr) const { + operand op = static_cast<const omega::CG_suifRepr *>(repr)->GetExpression(); + if (op.is_instr()) { + instruction *ins = op.instr(); + switch (ins->opcode()) { + case io_seq: + return IR_COND_EQ; + case io_sne: + return IR_COND_NE; + case io_sl: + return IR_COND_LT; + case io_sle: + return IR_COND_LE; + default: + return IR_COND_UNKNOWN; + } + } + else + return IR_COND_UNKNOWN; +} + + +std::vector<omega::CG_outputRepr *> IR_suifCode::QueryExpOperand(const omega::CG_outputRepr *repr) const { + std::vector<omega::CG_outputRepr *> v; + + operand op = static_cast<const omega::CG_suifRepr *>(repr)->GetExpression(); + if (op.is_immed() || op.is_symbol()) { + omega::CG_suifRepr *repr = new omega::CG_suifRepr(op); + v.push_back(repr); + } + else if (op.is_instr()) { + instruction *ins = op.instr(); + omega::CG_suifRepr *repr; + operand op1, op2; + switch (ins->opcode()) { + case io_cpy: + case io_neg: + case io_cvt: + op1 = ins->src_op(0); + repr = new omega::CG_suifRepr(op1); + v.push_back(repr); + break; + case io_add: + case io_sub: + case io_mul: + case io_div: + case io_min: + case io_max: + op1 = ins->src_op(0); + repr = new omega::CG_suifRepr(op1); + v.push_back(repr); + op2 = ins->src_op(1); + repr = new omega::CG_suifRepr(op2); + v.push_back(repr); + break; + case io_seq: + case io_sne: + case io_sl: + case io_sle: + op1 = ins->src_op(0); + repr = new omega::CG_suifRepr(op1); + v.push_back(repr); + op2 = ins->src_op(1); + repr = new omega::CG_suifRepr(op2); + v.push_back(repr); + break; + default: + throw ir_error("operation not supported"); + } + } + else + throw ir_error("operand type not supported"); + + return v; +} + + +// IR_Constant *IR_suifCode::QueryExpConstant(const CG_outputRepr *repr) const { +// CG_suifRepr *l_repr = static_cast<CG_suifRepr *>(const_cast<CG_outputRepr *>(repr)); + +// operand op = l_repr->GetExpression(); +// if (op.is_immed()) { +// immed im = op.immediate(); + +// switch (im.kind()) { +// case im_int: +// return new IR_suifConstant(this, static_cast<coef_t>(im.integer())); +// case im_extended_int: +// return new IR_suifConstant(this, static_cast<coef_t>(im.long_int())); +// case im_float: +// return new IR_suifConstant(this, im.flt()); +// default: +// assert(-1); +// } +// } +// else +// assert(-1); +// } + + +// IR_ScalarRef *IR_suifCode::QueryExpVariable(const CG_outputRepr *repr) const { +// CG_suifRepr *l_repr = static_cast<CG_suifRepr *>(const_cast<CG_outputRepr *>(repr)); + +// operand op = l_repr->GetExpression(); +// if (op.is_symbol()) +// return new IR_suifScalarRef(this, op.symbol()); +// else +// assert(-1); +// } + + +IR_Ref *IR_suifCode::Repr2Ref(const omega::CG_outputRepr *repr) const { + operand op = static_cast<const omega::CG_suifRepr *>(repr)->GetExpression(); + if (op.is_immed()) { + immed im = op.immediate(); + + switch (im.kind()) { + case im_int: + return new IR_suifConstantRef(this, static_cast<omega::coef_t>(im.integer())); + case im_extended_int: + return new IR_suifConstantRef(this, static_cast<omega::coef_t>(im.long_int())); + case im_float: + return new IR_suifConstantRef(this, im.flt()); + default: + throw ir_error("immediate value not integer or floatint point"); + } + } + else if (op.is_symbol()) + return new IR_suifScalarRef(this, op.symbol()); + else + throw ir_error("unrecognized reference type"); +} diff --git a/ir_suif.hh b/ir_suif.hh new file mode 100644 index 0000000..9c3d82d --- /dev/null +++ b/ir_suif.hh @@ -0,0 +1,212 @@ +#ifndef IR_SUIF_HH +#define IR_SUIF_HH + +#include <map> +#include <code_gen/CG_suifRepr.h> +#include <code_gen/CG_suifBuilder.h> +#include "ir_code.hh" + +struct IR_suifScalarSymbol: public IR_ScalarSymbol { + var_sym *vs_; + + IR_suifScalarSymbol(const IR_Code *ir, var_sym *vs) { + ir_ = ir; + vs_ = vs; + } + std::string name() const; + int size() const; + bool operator==(const IR_Symbol &that) const; + IR_Symbol *clone() const; +}; + + +struct IR_suifArraySymbol: public IR_ArraySymbol { + var_sym *vs_; + int indirect_; + int offset_; + + IR_suifArraySymbol(const IR_Code *ir, var_sym *vs, int indirect = 0, int offset = 0) { + ir_ = ir; + vs_ = vs; + indirect_ = indirect; + offset_ = offset; + } + std::string name() const; + int elem_size() const; + int n_dim() const; + omega::CG_outputRepr *size(int dim) const; + bool operator==(const IR_Symbol &that) const; + IR_ARRAY_LAYOUT_TYPE layout_type() const; + IR_Symbol *clone() const; +}; + + +struct IR_suifConstantRef: public IR_ConstantRef { + union { + omega::coef_t i_; + double f_; + }; + + IR_suifConstantRef(const IR_Code *ir, omega::coef_t i) { + ir_ = ir; + type_ = IR_CONSTANT_INT; + i_ = i; + } + IR_suifConstantRef(const IR_Code *ir, double f) { + ir_ = ir; + type_ = IR_CONSTANT_FLOAT; + f_ = f; + } + omega::coef_t integer() const {assert(is_integer()); return i_;} + bool operator==(const IR_Ref &that) const; + omega::CG_outputRepr *convert(); + IR_Ref *clone() const; +}; + + +struct IR_suifScalarRef: public IR_ScalarRef { + instruction *ins_pos_; + int op_pos_; // -1 means destination operand, otherwise source operand + var_sym *vs_; + + IR_suifScalarRef(const IR_Code *ir, var_sym *sym) { + ir_ = ir; + ins_pos_ = NULL; + vs_ = sym; + } + IR_suifScalarRef(const IR_Code *ir, instruction *ins, int pos) { + ir_ = ir; + ins_pos_ = ins; + op_pos_ = pos; + operand op; + if (pos == -1) + op = ins->dst_op(); + else + op = ins->src_op(pos); + assert(op.is_symbol()); + vs_ = op.symbol(); + } + bool is_write() const; + IR_ScalarSymbol *symbol() const; + bool operator==(const IR_Ref &that) const; + omega::CG_outputRepr *convert(); + IR_Ref *clone() const; +}; + + +struct IR_suifArrayRef: public IR_ArrayRef { + in_array *ia_; + + IR_suifArrayRef(const IR_Code *ir, in_array *ia) { + ir_ = ir; + ia_ = ia; + } + bool is_write() const; + omega::CG_outputRepr *index(int dim) const; + IR_ArraySymbol *symbol() const; + bool operator==(const IR_Ref &that) const; + omega::CG_outputRepr *convert(); + IR_Ref *clone() const; +}; + + +struct IR_suifLoop: public IR_Loop { + tree_for *tf_; + + IR_suifLoop(const IR_Code *ir, tree_for *tf) { ir_ = ir; tf_ = tf; } + ~IR_suifLoop() {} + IR_ScalarSymbol *index() const; + omega::CG_outputRepr *lower_bound() const; + omega::CG_outputRepr *upper_bound() const; + IR_CONDITION_TYPE stop_cond() const; + IR_Block *body() const; + int step_size() const; + IR_Block *convert(); + IR_Control *clone() const; +}; + + +struct IR_suifBlock: public IR_Block { + tree_node_list *tnl_; + tree_node_list_e *start_, *end_; + + IR_suifBlock(const IR_Code *ir, tree_node_list *tnl, tree_node_list_e *start, tree_node_list_e *end) { + ir_ = ir; tnl_ = tnl; start_ = start; end_ = end; + } + IR_suifBlock(const IR_Code *ir, tree_node_list *tnl) { + ir_ = ir; tnl_ = tnl; start_ = tnl_->head(); end_ = tnl_->tail(); + } + ~IR_suifBlock() {} + omega::CG_outputRepr *extract() const; + IR_Control *clone() const; +}; + + +struct IR_suifIf: public IR_If { + tree_if *ti_; + + IR_suifIf(const IR_Code *ir, tree_if *ti) { ir_ = ir; ti_ = ti; } + ~IR_suifIf() {} + omega::CG_outputRepr *condition() const; + IR_Block *then_body() const; + IR_Block *else_body() const; + IR_Block *convert(); + IR_Control *clone() const; +}; + + +// singleton class for global suif initialization +class IR_suifCode_Global_Init { +private: + static IR_suifCode_Global_Init *pinstance; +protected: + IR_suifCode_Global_Init(); + IR_suifCode_Global_Init(const IR_suifCode_Global_Init &); + IR_suifCode_Global_Init & operator= (const IR_suifCode_Global_Init &); +public: + static IR_suifCode_Global_Init *Instance(); + ~IR_suifCode_Global_Init() {} +}; + +// singleton class for global suif cleanup +class IR_suifCode_Global_Cleanup { +public: + IR_suifCode_Global_Cleanup() {} + ~IR_suifCode_Global_Cleanup(); +}; + +class IR_suifCode: public IR_Code{ +protected: + file_set_entry *fse_; + proc_sym *psym_; + proc_symtab *symtab_; + bool is_fortran_; + +public: + IR_suifCode(const char *filename, int proc_num); + ~IR_suifCode(); + + IR_ScalarSymbol *CreateScalarSymbol(const IR_Symbol *sym, int memory_type = 0); + IR_ArraySymbol *CreateArraySymbol(const IR_Symbol *sym, std::vector<omega::CG_outputRepr *> &size, int memory_type = 0); + IR_ScalarRef *CreateScalarRef(const IR_ScalarSymbol *sym); + IR_ArrayRef *CreateArrayRef(const IR_ArraySymbol *sym, std::vector<omega::CG_outputRepr *> &index); + int ArrayIndexStartAt() {if (is_fortran_) return 1; else return 0;} + + std::vector<IR_ArrayRef *> FindArrayRef(const omega::CG_outputRepr *repr) const; + std::vector<IR_ScalarRef *> FindScalarRef(const omega::CG_outputRepr *repr) const; + std::vector<IR_Control *> FindOneLevelControlStructure(const IR_Block *block) const; + IR_Block *MergeNeighboringControlStructures(const std::vector<IR_Control *> &controls) const; + IR_Block *GetCode() const; + void ReplaceCode(IR_Control *old, omega::CG_outputRepr *repr); + void ReplaceExpression(IR_Ref *old, omega::CG_outputRepr *repr); + + IR_OPERATION_TYPE QueryExpOperation(const omega::CG_outputRepr *repr) const; + IR_CONDITION_TYPE QueryBooleanExpOperation(const omega::CG_outputRepr *repr) const; + std::vector<omega::CG_outputRepr *> QueryExpOperand(const omega::CG_outputRepr *repr) const; + IR_Ref *Repr2Ref(const omega::CG_outputRepr *) const; + + friend class IR_suifArraySymbol; + friend class IR_suifArrayRef; +}; + +#endif diff --git a/ir_suif_utils.cc b/ir_suif_utils.cc new file mode 100644 index 0000000..f4e4edf --- /dev/null +++ b/ir_suif_utils.cc @@ -0,0 +1,477 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009 University of Utah + All Rights Reserved. + + Purpose: + SUIF interface utilities. + + Notes: + + Update history: + 01/2006 created by Chun Chen +*****************************************************************************/ + +#include <suif1.h> +#include <useful.h> +#include <vector> +#include <algorithm> +#include <code_gen/CG_suifRepr.h> +#include "ir_suif_utils.hh" + +// ---------------------------------------------------------------------------- +// Mandatory SUIF stuff +// ---------------------------------------------------------------------------- +char *prog_ver_string = "1.3.0.5-gccfix"; +char *prog_who_string = "automatically generated from chill"; +char *prog_suif_string = "suif"; + +// static file_set_entry *fse = NULL; +// static proc_sym *psym = NULL; + +// class SUIF_IR; + +// SUIF_IR *ir = NULL; + +// SUIF_IR::SUIF_IR(char *filename, int proc_num) { +// // LIBRARY(ipmath, init_ipmath, exit_ipmath); +// LIBRARY(useful, init_useful, exit_useful); +// LIBRARY(annotes, init_annotes, exit_annotes); + +// int argc = 3; +// char *argv[3]; +// argv[0] = "loop_xform"; +// argv[1] = strdup(filename); +// argv[2] = strdup(filename); +// char *pos = strrchr(argv[2], '.'); +// if (pos == NULL) +// strcat(argv[2], ".lxf"); +// else { +// *pos = '\0'; +// strcat(argv[2], ".lxf"); +// } +// init_suif(argc, argv); + +// fileset->add_file(argv[1], argv[2]); +// fileset->reset_iter(); +// _fse = fileset->next_file(); +// _fse->reset_proc_iter(); +// int cur_proc = 0; +// while ((_psym = _fse->next_proc()) && cur_proc < proc_num) +// ++cur_proc; +// if (cur_proc != proc_num) { +// fprintf(stderr, "procedure number %d couldn't be found\n", proc_num); +// exit(1); +// } +// if (!_psym->is_in_memory()) +// _psym->read_proc(TRUE, _psym->src_lang() == src_fortran); + +// push_clue(_psym->block()); +// } + + +// SUIF_IR::~SUIF_IR() { +// pop_clue(_psym->block()); +// if (!_psym->is_written()) +// _psym->write_proc(_fse); +// _psym->flush_proc(); + +// exit_suif1(); +// } + + +// tree_for *SUIF_IR::get_loop(int loop_num) { +// std::vector<tree_for *> loops = find_loops(_psym->block()->body()); +// if (loop_num >= loops.size()) { +// fprintf(stderr, "loop number %d couldn't be found\n", loop_num); +// exit(1); +// } +// return loops[loop_num]; +// } + + +// void SUIF_IR::commit(Loop *lp, int loop_num) { +// if (lp == NULL) +// return; + +// if (lp->init_code != NULL) { +// tree_node_list *init_tnl = static_cast<CG_suifRepr *>(lp->init_code->clone())->GetCode(); +// tree_node_list_iter iter(lp->symtab->block()->body()); +// iter.step(); +// lp->symtab->block()->body()->insert_before(init_tnl, iter.cur_elem()); +// } + +// tree_node_list *code = lp->getCode(); +// std::vector<tree_for *> loops = find_loops(_psym->block()->body()); +// tree_node_list *tnl = loops[loop_num]->parent(); +// tnl->insert_before(code, loops[loop_num]->list_e()); +// tnl->remove(loops[loop_num]->list_e()); +// } + + +// extern void start_suif(int &argc, char *argv[]) { +// // LIBRARY(ipmath, init_ipmath, exit_ipmath); +// LIBRARY(useful, init_useful, exit_useful); +// LIBRARY(annotes, init_annotes, exit_annotes); + +// init_suif(argc, argv); +// } + +// tree_for *init_loop(char *filename, int proc_num, int loop_num) { +// // LIBRARY(ipmath, init_ipmath, exit_ipmath); +// LIBRARY(useful, init_useful, exit_useful); +// LIBRARY(annotes, init_annotes, exit_annotes); + +// int argc = 3; +// char *argv[3]; +// argv[0] = "loop_xform"; +// argv[1] = filename; +// argv[2] = strdup(filename); +// char *pos = strrchr(argv[2], '.'); +// if (pos == NULL) +// strcat(argv[2], ".lxf"); +// else { +// *pos = '\0'; +// strcat(argv[2], ".lxf"); +// } +// printf("%s %s %s\n", argv[0], argv[1], argv[2]); +// init_suif(argc, argv); + +// fileset->add_file(argv[1], argv[2]); +// fileset->reset_iter(); +// fse = fileset->next_file(); +// fse->reset_proc_iter(); +// int cur_proc = 0; +// while ((psym = fse->next_proc()) && cur_proc < proc_num) +// ++cur_proc; +// if (cur_proc != proc_num) { +// fprintf(stderr, "procedure number %d couldn't be found\n", proc_num); +// exit(1); +// } + +// if (!psym->is_in_memory()) +// psym->read_proc(TRUE, psym->src_lang() == src_fortran); + +// push_clue(psym->block()); +// std::vector<tree_for *> loops = find_loops(psym->block()->body()); +// if (loop_num >= loops.size()) +// return NULL; +// return loops[loop_num]; +// } + + +// void finalize_loop() { + +// printf("finalize %d\n", fse); +// pop_clue(psym->block()); +// if (!psym->is_written()) +// psym->write_proc(fse); +// psym->flush_proc(); +// } + + + +// // ---------------------------------------------------------------------------- +// // Class: CG_suifArray +// // ---------------------------------------------------------------------------- +// CG_suifArray::CG_suifArray(in_array *ia_): ia(ia_) { +// var_sym *vs = get_sym_of_array(ia); +// name = String(vs->name()); + +// for (int i = 0; i < ia->dims(); i++) +// index.push_back(new CG_suifRepr(ia->index(i))); +// } + +// bool CG_suifArray::is_write() { +// return is_lhs(ia); +// } + + +// ---------------------------------------------------------------------------- +// Find array index in various situations. +// ---------------------------------------------------------------------------- +operand find_array_index(in_array *ia, int n, int dim, bool is_fortran) { + if (!is_fortran) + dim = n - dim - 1; + int level = n - dim -1; + + in_array *current = ia; + + while (true) { + int n = current->dims(); + if (level < n) { + return current->index(level); + } + else { + level = level - n; + operand op = current->base_op(); + assert(op.is_instr()); + instruction *ins = op.instr(); + if (ins->opcode() != io_cvt) + return operand(); + operand op2 = static_cast<in_rrr *>(ins)->src_op(); + assert(op2.is_instr()); + instruction *ins2 = op2.instr(); + assert(ins2->opcode() == io_lod); + operand op3 = static_cast<in_rrr *>(ins2)->src_op(); + assert(op3.is_instr()); + instruction *ins3 = op3.instr(); + assert(ins3->opcode() == io_array); + current = static_cast<in_array *>(ins3); + } + } +} + + + + +// ---------------------------------------------------------------------------- +// Check if a tree_node is doing nothing +// ---------------------------------------------------------------------------- +bool is_null_statement(tree_node *tn) { + if (tn->kind() != TREE_INSTR) + return false; + + instruction *ins = static_cast<tree_instr*>(tn)->instr(); + + if (ins->opcode() == io_mrk || ins->opcode() == io_nop) + return true; + else + return false; +} + +// ---------------------------------------------------------------------------- +// Miscellaneous loop functions +// ---------------------------------------------------------------------------- +std::vector<tree_for *> find_deepest_loops(tree_node *tn) { + if (tn->kind() == TREE_FOR) { + std::vector<tree_for *> loops; + + tree_for *tnf = static_cast<tree_for *>(tn); + loops.insert(loops.end(), tnf); + std::vector<tree_for *> t = find_deepest_loops(tnf->body()); + std::copy(t.begin(), t.end(), std::back_inserter(loops)); + + return loops; + } + else if (tn->kind() == TREE_BLOCK) { + tree_block *tnb = static_cast<tree_block *>(tn); + return find_deepest_loops(tnb->body()); + } + else + return std::vector<tree_for *>(); +} + +std::vector<tree_for *> find_deepest_loops(tree_node_list *tnl) { + std::vector<tree_for *> loops; + + tree_node_list_iter iter(tnl); + while (!iter.is_empty()) { + tree_node *tn = iter.step(); + + std::vector<tree_for *> t = find_deepest_loops(tn); + + if (t.size() > loops.size()) + loops = t; + } + + return loops; +} + +std::vector<tree_for *> find_loops(tree_node_list *tnl) { + std::vector<tree_for *> result; + + tree_node_list_iter iter(tnl); + while (!iter.is_empty()) { + tree_node *tn = iter.step(); + if (tn->kind() == TREE_FOR) + result.push_back(static_cast<tree_for *>(tn)); + } + + return result; +} + + +std::vector<tree_for *> find_outer_loops(tree_node *tn) { + std::vector<tree_for *> loops; + + while(tn) { + if(tn->kind() == TREE_FOR) + loops.insert(loops.begin(),static_cast<tree_for*>(tn)); + tn = (tn->parent())?tn->parent()->parent():NULL; + } + + return loops; +} + +std::vector<tree_for *> find_common_loops(tree_node *tn1, tree_node *tn2) { + std::vector<tree_for *> loops1 = find_outer_loops(tn1); + std::vector<tree_for *> loops2 = find_outer_loops(tn2); + + std::vector<tree_for *> loops; + + for (unsigned i = 0; i < std::min(loops1.size(), loops2.size()); i++) { + if (loops1[i] == loops2[i]) + loops.insert(loops.end(), loops1[i]); + else + break; + } + + return loops; +} + + +//----------------------------------------------------------------------------- +// Determine the lexical order between two instructions. +//----------------------------------------------------------------------------- +LexicalOrderType lexical_order(tree_node *tn1, tree_node *tn2) { + if (tn1 == tn2) + return LEX_MATCH; + + std::vector<tree_node *> tnv1; + std::vector<tree_node_list *> tnlv1; + while (tn1 != NULL && tn1->parent() != NULL) { + tnv1.insert(tnv1.begin(), tn1); + tnlv1.insert(tnlv1.begin(), tn1->parent()); + tn1 = tn1->parent()->parent(); + } + + std::vector<tree_node *> tnv2; + std::vector<tree_node_list *> tnlv2; + while (tn2 != NULL && tn2->parent() != NULL) { + tnv2.insert(tnv2.begin(), tn2); + tnlv2.insert(tnlv2.begin(), tn2->parent()); + tn2 = tn2->parent()->parent(); + } + + for (int i = 0; i < std::min(tnlv1.size(), tnlv2.size()); i++) { + if (tnlv1[i] == tnlv2[i] && tnv1[i] != tnv2[i]) { + tree_node_list_iter iter(tnlv1[i]); + + while (!iter.is_empty()) { + tree_node *tn = iter.step(); + + if (tn == tnv1[i]) + return LEX_BEFORE; + else if (tn == tnv2[i]) + return LEX_AFTER; + } + + break; + } + } + + return LEX_UNKNOWN; +} + + + +//----------------------------------------------------------------------------- +// Get the list of array instructions +//----------------------------------------------------------------------------- +std::vector<in_array *> find_arrays(instruction *ins) { + std::vector<in_array *> arrays; + if (ins->opcode() == io_array) { + arrays.insert(arrays.end(), static_cast<in_array *>(ins)); + } + else { + for (int i = 0; i < ins->num_srcs(); i++) { + operand op(ins->src_op(i)); + if (op.is_instr()) { + std::vector<in_array *> t = find_arrays(op.instr()); + std::copy(t.begin(), t.end(), back_inserter(arrays)); + } + } + } + return arrays; +} + +std::vector<in_array *> find_arrays(tree_node_list *tnl) { + std::vector<in_array *> arrays, t; + tree_node_list_iter iter(tnl); + + while (!iter.is_empty()) { + tree_node *tn = iter.step(); + + if (tn->kind() == TREE_FOR) { + tree_for *tnf = static_cast<tree_for *>(tn); + + t = find_arrays(tnf->body()); + std::copy(t.begin(), t.end(), back_inserter(arrays)); + } + else if (tn->kind() == TREE_IF) { + tree_if *tni = static_cast<tree_if *>(tn); + + t = find_arrays(tni->header()); + std::copy(t.begin(), t.end(), back_inserter(arrays)); + t = find_arrays(tni->then_part()); + std::copy(t.begin(), t.end(), back_inserter(arrays)); + t = find_arrays(tni->else_part()); + std::copy(t.begin(), t.end(), back_inserter(arrays)); + } + else if (tn->kind() == TREE_BLOCK) { + t = find_arrays(static_cast<tree_block *>(tn)->body()); + std::copy(t.begin(), t.end(), back_inserter(arrays)); + } + else if (tn->kind() == TREE_INSTR) { + t = find_arrays(static_cast<tree_instr *>(tn)->instr()); + std::copy(t.begin(), t.end(), back_inserter(arrays)); + } + } + + return arrays; +} + +// std::vector<CG_suifArray *> find_array_access(instruction *ins) { +// std::vector<CG_suifArray *> arrays; + +// if (ins->opcode() == io_array) { +// arrays.push_back(new CG_suifArray(static_cast<in_array *>(ins))); +// } +// else { +// for (int i = 0; i < ins->num_srcs(); i++) { +// operand op(ins->src_op(i)); +// if (op.is_instr()) { +// std::vector<CG_suifArray *> t = find_array_access(op.instr()); +// std::copy(t.begin(), t.end(), back_inserter(arrays)); +// } +// } +// } +// return arrays; +// } + +// std::vector<CG_suifArray *> find_array_access(tree_node_list *tnl) { +// std::vector<CG_suifArray *> arrays, t; +// tree_node_list_iter iter(tnl); + +// while (!iter.is_empty()) { +// tree_node *tn = iter.step(); + +// if (tn->kind() == TREE_FOR) { +// tree_for *tnf = static_cast<tree_for *>(tn); + +// t = find_array_access(tnf->body()); +// std::copy(t.begin(), t.end(), back_inserter(arrays)); +// } +// else if (tn->kind() == TREE_IF) { +// tree_if *tni = static_cast<tree_if *>(tn); + +// t = find_array_access(tni->header()); +// std::copy(t.begin(), t.end(), back_inserter(arrays)); +// t = find_array_access(tni->then_part()); +// std::copy(t.begin(), t.end(), back_inserter(arrays)); +// t = find_array_access(tni->else_part()); +// std::copy(t.begin(), t.end(), back_inserter(arrays)); +// } +// else if (tn->kind() == TREE_BLOCK) { +// t = find_array_access(static_cast<tree_block *>(tn)->body()); +// std::copy(t.begin(), t.end(), back_inserter(arrays)); +// } +// else if (tn->kind() == TREE_INSTR) { +// t = find_array_access(static_cast<tree_instr *>(tn)->instr()); +// std::copy(t.begin(), t.end(), back_inserter(arrays)); +// } +// } + +// return arrays; +// } diff --git a/ir_suif_utils.hh b/ir_suif_utils.hh new file mode 100644 index 0000000..55d5ce7 --- /dev/null +++ b/ir_suif_utils.hh @@ -0,0 +1,72 @@ +#ifndef IR_SUIF_UTILS_HH +#define IR_SUIF_UTILS_HH +#include <vector> +#include <suif1.h> +// #include "cctools.hh" +#include "omegatools.hh" +// #include "loop.hh" + +// c++ stuff: + +// template <class T> const T& min(const T &a, const T &b) { +// if ( a < b) +// return a; +// else +// return b; +// } + +// template <class T> T abs(const T &v) { +// if (v < static_cast<T>(0)) +// return -v; +// else +// return v; +// } + +// class CG_suifArray: public CG_inputArray { +// protected: +// in_array *ia; +// public: +// CG_suifArray(in_array *ia_); +// virtual bool is_write(); +// }; + + +// class SUIF_IR { +// public: +// file_set_entry *_fse; +// proc_sym *_psym; +// SUIF_IR(char *filename, int proc_num); +// ~SUIF_IR(); + +// tree_for *get_loop(int loop_num); +// void commit(Loop *lp, int loop_num); +// }; + +// extern SUIF_IR *ir; + +// suif stuff: + +// tree_for *init_loop(char *filename, int proc_num, int loop_num); +// void finalize_loop(); + + +operand find_array_index(in_array *ia, int n, int dim, bool is_fortran); +bool is_null_statement(tree_node *tn); +std::vector<tree_for *> find_deepest_loops(tree_node *tn); +std::vector<tree_for *> find_deepest_loops(tree_node_list *tnl); +std::vector<tree_for *> find_loops(tree_node_list *tnl); +std::vector<tree_for*> find_outer_loops(tree_node *tn); +std::vector<tree_for *> find_common_loops(tree_node *tn1, tree_node *tn2); +LexicalOrderType lexical_order(tree_node *tn1, tree_node *tn2); +std::vector<in_array *> find_arrays(instruction *ins); +std::vector<in_array *> find_arrays(tree_node_list *tnl); + +//protonu--adding a few functions used it cuda-chil +//these are defined in ir_cuda_suif_uitls.cc +tree_node_list* loop_body_at_level(tree_node_list* tnl, int level); +tree_node_list* loop_body_at_level(tree_for* loop, int level); +tree_node_list* swap_node_for_node_list(tree_node* tn, tree_node_list* new_tnl); +// std::vector<CG_suifArray *> find_arrays_access(instruction *ins); +// std::vector<CG_suifArray *> find_arrays_access(tree_node_list *tnl); + +#endif diff --git a/irtools.cc b/irtools.cc new file mode 100644 index 0000000..4ab6c85 --- /dev/null +++ b/irtools.cc @@ -0,0 +1,279 @@ +/***************************************************************************** + Copyright (C) 2010 University of Utah + All Rights Reserved. + + Purpose: + Useful tools to analyze code in compiler IR format. + + Notes: + + History: + 06/2010 Created by Chun Chen. +*****************************************************************************/ + +#include <iostream> +#include <code_gen/CG_outputBuilder.h> +#include "irtools.hh" +#include "omegatools.hh" +#include "chill_error.hh" + +using namespace omega; + +// Build IR tree from the source code. Block type node can only be +// leaf, i.e., there is no further structures inside a block allowed. +std::vector<ir_tree_node *> build_ir_tree(IR_Control *control, ir_tree_node *parent) { + std::vector<ir_tree_node *> result; + + switch (control->type()) { + case IR_CONTROL_BLOCK: { + std::vector<IR_Control *> controls = control->ir_->FindOneLevelControlStructure(static_cast<IR_Block *>(control)); + if (controls.size() == 0) { + ir_tree_node *node = new ir_tree_node; + node->content = control; + node->parent = parent; + node->payload = -1; + result.push_back(node); + } + else { + delete control; + for (int i = 0; i < controls.size(); i++) + switch (controls[i]->type()) { + case IR_CONTROL_BLOCK: { + std::vector<ir_tree_node *> t = build_ir_tree(controls[i], parent); + result.insert(result.end(), t.begin(), t.end()); + break; + } + case IR_CONTROL_LOOP: { + ir_tree_node *node = new ir_tree_node; + node->content = controls[i]; + node->parent = parent; + node->children = build_ir_tree(static_cast<IR_Loop *>(controls[i])->body(), node); + node->payload = -1; + result.push_back(node); + break; + } + case IR_CONTROL_IF: { + static int unique_if_identifier = 0; + + IR_Block *block = static_cast<IR_If *>(controls[i])->then_body(); + if (block != NULL) { + ir_tree_node *node = new ir_tree_node; + node->content = controls[i]; + node->parent = parent; + node->children = build_ir_tree(block, node); + node->payload = unique_if_identifier+1; + result.push_back(node); + } + + + block = static_cast<IR_If *>(controls[i])->else_body(); + if ( block != NULL) { + ir_tree_node *node = new ir_tree_node; + node->content = controls[i]->clone(); + node->parent = parent; + node->children = build_ir_tree(block, node); + node->payload = unique_if_identifier; + result.push_back(node); + } + + unique_if_identifier += 2; + break; + } + default: + ir_tree_node *node = new ir_tree_node; + node->content = controls[i]; + node->parent = parent; + node->payload = -1; + result.push_back(node); + break; + } + } + break; + } + case IR_CONTROL_LOOP: { + ir_tree_node *node = new ir_tree_node; + node->content = control; + node->parent = parent; + node->children = build_ir_tree(static_cast<const IR_Loop *>(control)->body(), node); + node->payload = -1; + result.push_back(node); + break; + } + default: + ir_tree_node *node = new ir_tree_node; + node->content = control; + node->parent = parent; + node->payload = -1; + result.push_back(node); + break; + } + + return result; +} + + +// Extract statements from IR tree. Statements returned are ordered in +// lexical order in the source code. +std::vector<ir_tree_node *> extract_ir_stmts(const std::vector<ir_tree_node *> &ir_tree) { + std::vector<ir_tree_node *> result; + for (int i = 0; i < ir_tree.size(); i++) + switch (ir_tree[i]->content->type()) { + case IR_CONTROL_BLOCK: + result.push_back(ir_tree[i]); + break; + case IR_CONTROL_LOOP: { + // clear loop payload from previous unsuccessful initialization process + ir_tree[i]->payload = -1; + + std::vector<ir_tree_node *> t = extract_ir_stmts(ir_tree[i]->children); + result.insert(result.end(), t.begin(), t.end()); + break; + } + case IR_CONTROL_IF: { + std::vector<ir_tree_node *> t = extract_ir_stmts(ir_tree[i]->children); + result.insert(result.end(), t.begin(), t.end()); + break; + } + default: + throw std::invalid_argument("invalid ir tree"); + } + + return result; +} + + +bool is_dependence_valid(ir_tree_node *src_node, ir_tree_node *dst_node, + const DependenceVector &dv, bool before) { + std::set<ir_tree_node *> loop_nodes; + ir_tree_node *itn = src_node; + + if (!dv.is_scalar_dependence) { + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + loop_nodes.insert(itn); + } + + int last_dim = -1; + itn = dst_node; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP + && loop_nodes.find(itn) != loop_nodes.end() + && itn->payload > last_dim) + last_dim = itn->payload; + } + + if (last_dim == -1) + return true; + + for (int i = 0; i <= last_dim; i++) { + if (dv.lbounds[i] > 0) + return true; + else if (dv.lbounds[i] < 0) + return false; + } + + if (before) + return true; + else + return false; + } + + return true; + +} + + + +// Test data dependences between two statements. The first statement +// in parameter must be lexically before the second statement in +// parameter. Returned dependences are all lexicographically +// positive. The first vector in returned pair is dependences from the +// first statement to the second statement and the second vector in +// returned pair is in reverse order. +std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > test_data_dependences( + IR_Code *ir, const CG_outputRepr *repr1, const Relation &IS1, + const CG_outputRepr *repr2, const Relation &IS2, + std::vector<Free_Var_Decl*> &freevar, std::vector<std::string> index, + int i, int j) { + std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > result; + + if (repr1 == repr2) { + std::vector<IR_ArrayRef *> access = ir->FindArrayRef(repr1); + + for (int i = 0; i < access.size(); i++) { + IR_ArrayRef *a = access[i]; + IR_ArraySymbol *sym_a = a->symbol(); + for (int j = i; j < access.size(); j++) { + IR_ArrayRef *b = access[j]; + IR_ArraySymbol *sym_b = b->symbol(); + + if (*sym_a == *sym_b && (a->is_write() || b->is_write())) { + Relation r = arrays2relation(ir, freevar, a, IS1, b, IS2); + std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = + relation2dependences(a, b, r); + result.first.insert(result.first.end(), dv.first.begin(), + dv.first.end()); + result.second.insert(result.second.end(), dv.second.begin(), + dv.second.end()); + } + delete sym_b; + } + delete sym_a; + + } + + for (int i = 0; i < access.size(); i++) + delete access[i]; + } else { + std::vector<IR_ArrayRef *> access1 = ir->FindArrayRef(repr1); + std::vector<IR_ArrayRef *> access2 = ir->FindArrayRef(repr2); + + for (int i = 0; i < access1.size(); i++) { + IR_ArrayRef *a = access1[i]; + IR_ArraySymbol *sym_a = a->symbol(); + + for (int j = 0; j < access2.size(); j++) { + IR_ArrayRef *b = access2[j]; + IR_ArraySymbol *sym_b = b->symbol(); + if (*sym_a == *sym_b && (a->is_write() || b->is_write())) { + Relation r = arrays2relation(ir, freevar, a, IS1, b, IS2); + std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = + relation2dependences(a, b, r); + + result.first.insert(result.first.end(), dv.first.begin(), + dv.first.end()); + result.second.insert(result.second.end(), dv.second.begin(), + dv.second.end()); + } + delete sym_b; + } + delete sym_a; + } + + for (int i = 0; i < access1.size(); i++) + delete access1[i]; + for (int i = 0; i < access2.size(); i++) + delete access2[i]; + } + /*std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = + ir->FindScalarDeps(repr1, repr2, index, i, j); + + + result.first.insert(result.first.end(), dv.first.begin(), + dv.first.end()); + result.second.insert(result.second.end(), dv.second.begin(), + dv.second.end());*/ + /*result.first.insert(result.first.end(), dv.first.begin(), + dv.first.end()); + result.second.insert(result.second.end(), dv.second.begin(), + dv.second.end()); + */ + + return result; +} + diff --git a/irtools.hh b/irtools.hh new file mode 100644 index 0000000..8dc8e28 --- /dev/null +++ b/irtools.hh @@ -0,0 +1,40 @@ +#ifndef IRTOOLS_HH +#define IRTOOLS_HH + +#include <vector> +#include <omega.h> +#include <code_gen/CG_outputRepr.h> +#include "ir_code.hh" +#include "dep.hh" + +// IR tree is used to initialize a loop. For a loop node, payload is +// its mapped iteration space dimension. For a simple block node, +// payload is its mapped statement number. Normal if-else is splitted +// into two nodes where the one with odd payload represents then-part and +// the one with even payload represents else-part. +struct ir_tree_node { + IR_Control *content; + ir_tree_node *parent; + std::vector<ir_tree_node *> children; + int payload; + + ~ir_tree_node() { + for (int i = 0; i < children.size(); i++) + delete children[i]; + delete content; + } +}; + +std::vector<ir_tree_node *> build_ir_tree(IR_Control *control, + ir_tree_node *parent = NULL); +std::vector<ir_tree_node *> extract_ir_stmts( + const std::vector<ir_tree_node *> &ir_tree); +bool is_dependence_valid(ir_tree_node *src_node, ir_tree_node *dst_node, + const DependenceVector &dv, bool before); +std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > test_data_dependences( + IR_Code *ir, const omega::CG_outputRepr *repr1, + const omega::Relation &IS1, const omega::CG_outputRepr *repr2, + const omega::Relation &IS2, std::vector<omega::Free_Var_Decl*> &freevar, + std::vector<std::string> index, int i, int j); + +#endif @@ -0,0 +1,1869 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Core loop transformation functionality. + + Notes: + "level" (starting from 1) means loop level and it corresponds to "dim" + (starting from 0) in transformed iteration space [c_1,l_1,c_2,l_2,...., + c_n,l_n,c_(n+1)], e.g., l_2 is loop level 2 in generated code, dim 3 + in transformed iteration space, and variable 4 in Omega relation. + All c's are constant numbers only and they will not show up as actual loops. + Formula: + dim = 2*level - 1 + var = dim + 1 + + History: + 10/2005 Created by Chun Chen. + 09/2009 Expand tile functionality, -chun + 10/2009 Initialize unfusible loop nest without bailing out, -chun +*****************************************************************************/ + +#include <limits.h> +#include <math.h> +#include <codegen.h> +#include <code_gen/CG_utils.h> +#include <iostream> +#include <algorithm> +#include <map> +#include "loop.hh" +#include "omegatools.hh" +#include "irtools.hh" +#include "chill_error.hh" +#include <string.h> +#include <list> +using namespace omega; + +const std::string Loop::tmp_loop_var_name_prefix = std::string("chill_t"); // Manu:: In fortran, first character of a variable name must be a letter, so this change +const std::string Loop::overflow_var_name_prefix = std::string("over"); + +//----------------------------------------------------------------------------- +// Class Loop +//----------------------------------------------------------------------------- +// --begin Anand: Added from CHiLL 0.2 + +bool Loop::isInitialized() const { + return stmt.size() != 0 && !stmt[0].xform.is_null(); +} + +//--end Anand: added from CHiLL 0.2 + +bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, + std::vector<ir_tree_node *> &ir_stmt) { + ir_stmt = extract_ir_stmts(ir_tree); + stmt_nesting_level_.resize(ir_stmt.size()); + std::vector<int> stmt_nesting_level(ir_stmt.size()); + for (int i = 0; i < ir_stmt.size(); i++) { + ir_stmt[i]->payload = i; + int t = 0; + ir_tree_node *itn = ir_stmt[i]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + t++; + } + stmt_nesting_level_[i] = t; + stmt_nesting_level[i] = t; + } + + stmt = std::vector<Statement>(ir_stmt.size()); + int n_dim = -1; + int max_loc; + //std::vector<std::string> index; + for (int i = 0; i < ir_stmt.size(); i++) { + int max_nesting_level = -1; + int loc; + for (int j = 0; j < ir_stmt.size(); j++) + if (stmt_nesting_level[j] > max_nesting_level) { + max_nesting_level = stmt_nesting_level[j]; + loc = j; + } + + // most deeply nested statement acting as a reference point + if (n_dim == -1) { + n_dim = max_nesting_level; + max_loc = loc; + + index = std::vector<std::string>(n_dim); + + ir_tree_node *itn = ir_stmt[loc]; + int cur_dim = n_dim - 1; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) { + index[cur_dim] = + static_cast<IR_Loop *>(itn->content)->index()->name(); + itn->payload = cur_dim--; + } + } + } + + // align loops by names, temporary solution + ir_tree_node *itn = ir_stmt[loc]; + int depth = stmt_nesting_level_[loc] - 1; + /* while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { + std::string name = static_cast<IR_Loop *>(itn->content)->index()->name(); + for (int j = 0; j < n_dim; j++) + if (index[j] == name) { + itn->payload = j; + break; + } + if (itn->payload == -1) + throw loop_error("no complex alignment yet"); + } + } + */ + for (int t = depth; t >= 0; t--) { + int y = t; + ir_tree_node *itn = ir_stmt[loc]; + + while ((itn->parent != NULL) && (y >= 0)) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + y--; + } + + if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { + CG_outputBuilder *ocg = ir->builder(); + + itn->payload = depth - t; + + CG_outputRepr *code = + static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); + + std::vector<CG_outputRepr *> index_expr; + std::vector<std::string> old_index; + CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]); + index_expr.push_back(repl); + old_index.push_back( + static_cast<IR_Loop *>(itn->content)->index()->name()); + code = ocg->CreateSubstitutedStmt(0, code, old_index, + index_expr); + + replace.insert(std::pair<int, CG_outputRepr*>(loc, code)); + //stmt[loc].code = code; + + } + } + + // set relation variable names + Relation r(n_dim); + F_And *f_root = r.add_and(); + itn = ir_stmt[loc]; + int temp_depth = depth; + while (itn->parent != NULL) { + + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) { + r.name_set_var(itn->payload + 1, index[temp_depth]); + + temp_depth--; + } + //static_cast<IR_Loop *>(itn->content)->index()->name()); + } + + /*while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + r.name_set_var(itn->payload+1, static_cast<IR_Loop *>(itn->content)->index()->name()); + }*/ + + // extract information from loop/if structures + std::vector<bool> processed(n_dim, false); + std::vector<std::string> vars_to_be_reversed; + itn = ir_stmt[loc]; + while (itn->parent != NULL) { + itn = itn->parent; + + switch (itn->content->type()) { + case IR_CONTROL_LOOP: { + IR_Loop *lp = static_cast<IR_Loop *>(itn->content); + Variable_ID v = r.set_var(itn->payload + 1); + int c; + + try { + c = lp->step_size(); + if (c > 0) { + CG_outputRepr *lb = lp->lower_bound(); + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, true); + CG_outputRepr *ub = lp->upper_bound(); + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + cond, true); + else + throw ir_error("loop condition not supported"); + + } else if (c < 0) { + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *lb = lp->lower_bound(); + lb = ocg->CreateMinus(NULL, lb); + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, true); + CG_outputRepr *ub = lp->upper_bound(); + ub = ocg->CreateMinus(NULL, ub); + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_GE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + IR_COND_LE, true); + else if (cond == IR_COND_GT) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + IR_COND_LT, true); + else + throw ir_error("loop condition not supported"); + + vars_to_be_reversed.push_back(lp->index()->name()); + } else + throw ir_error("loop step size zero"); + } catch (const ir_error &e) { + for (int i = 0; i < itn->children.size(); i++) + delete itn->children[i]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + return false; + } + + if (abs(c) != 1) { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e = f_exists->declare(); + F_And *f_and = f_exists->add_and(); + Stride_Handle h = f_and->add_stride(abs(c)); + if (c > 0) + h.update_coef(e, 1); + else + h.update_coef(e, -1); + h.update_coef(v, -1); + CG_outputRepr *lb = lp->lower_bound(); + exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ, + true); + } + + processed[itn->payload] = true; + break; + } + case IR_CONTROL_IF: { + CG_outputRepr *cond = + static_cast<IR_If *>(itn->content)->condition(); + try { + if (itn->payload % 2 == 1) + exp2constraint(ir, r, f_root, freevar, cond, true); + else { + F_Not *f_not = f_root->add_not(); + F_And *f_and = f_not->add_and(); + exp2constraint(ir, r, f_and, freevar, cond, true); + } + } catch (const ir_error &e) { + std::vector<ir_tree_node *> *t; + if (itn->parent == NULL) + t = &ir_tree; + else + t = &(itn->parent->children); + int id = itn->payload; + int i = t->size() - 1; + while (i >= 0) { + if ((*t)[i] == itn) { + for (int j = 0; j < itn->children.size(); j++) + delete itn->children[j]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + } else if ((*t)[i]->payload >> 1 == id >> 1) { + delete (*t)[i]; + t->erase(t->begin() + i); + } + i--; + } + return false; + } + + break; + } + default: + for (int i = 0; i < itn->children.size(); i++) + delete itn->children[i]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + return false; + } + } + + // add information for missing loops + for (int j = 0; j < n_dim; j++) + if (!processed[j]) { + ir_tree_node *itn = ir_stmt[max_loc]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP + && itn->payload == j) + break; + } + + Variable_ID v = r.set_var(j + 1); + if (loc < max_loc) { + + CG_outputBuilder *ocg = ir->builder(); + + CG_outputRepr *lb = + static_cast<IR_Loop *>(itn->content)->lower_bound(); + + exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ, + false); + + /* if (ir->QueryExpOperation( + static_cast<IR_Loop *>(itn->content)->lower_bound()) + == IR_OP_VARIABLE) { + IR_ScalarRef *ref = + static_cast<IR_ScalarRef *>(ir->Repr2Ref( + static_cast<IR_Loop *>(itn->content)->lower_bound())); + std::string name_ = ref->name(); + + for (int i = 0; i < index.size(); i++) + if (index[i] == name_) { + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, false); + + CG_outputRepr *ub = + static_cast<IR_Loop *>(itn->content)->upper_bound(); + IR_CONDITION_TYPE cond = + static_cast<IR_Loop *>(itn->content)->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, + 's', cond, false); + + + + } + + } + */ + + } else { // loc > max_loc + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *ub = + static_cast<IR_Loop *>(itn->content)->upper_bound(); + + exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ, + false); + /*if (ir->QueryExpOperation( + static_cast<IR_Loop *>(itn->content)->upper_bound()) + == IR_OP_VARIABLE) { + IR_ScalarRef *ref = + static_cast<IR_ScalarRef *>(ir->Repr2Ref( + static_cast<IR_Loop *>(itn->content)->upper_bound())); + std::string name_ = ref->name(); + + for (int i = 0; i < index.size(); i++) + if (index[i] == name_) { + + CG_outputRepr *lb = + static_cast<IR_Loop *>(itn->content)->lower_bound(); + + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, false); + + CG_outputRepr *ub = + static_cast<IR_Loop *>(itn->content)->upper_bound(); + IR_CONDITION_TYPE cond = + static_cast<IR_Loop *>(itn->content)->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, + 's', cond, false); + + + } + } + */ + } + } + + r.setup_names(); + r.simplify(); + + // insert the statement + CG_outputBuilder *ocg = ir->builder(); + std::vector<CG_outputRepr *> reverse_expr; + for (int j = 1; j <= vars_to_be_reversed.size(); j++) { + CG_outputRepr *repl = ocg->CreateIdent(vars_to_be_reversed[j]); + repl = ocg->CreateMinus(NULL, repl); + reverse_expr.push_back(repl); + } + CG_outputRepr *code = + static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); + code = ocg->CreateSubstitutedStmt(0, code, vars_to_be_reversed, + reverse_expr); + stmt[loc].code = code; + stmt[loc].IS = r; + stmt[loc].loop_level = std::vector<LoopLevel>(n_dim); + stmt[loc].ir_stmt_node = ir_stmt[loc]; + for (int i = 0; i < n_dim; i++) { + stmt[loc].loop_level[i].type = LoopLevelOriginal; + stmt[loc].loop_level[i].payload = i; + stmt[loc].loop_level[i].parallel_level = 0; + } + + stmt_nesting_level[loc] = -1; + } + + return true; +} + +Loop::Loop(const IR_Control *control) { + + last_compute_cgr_ = NULL; + last_compute_cg_ = NULL; + + ir = const_cast<IR_Code *>(control->ir_); + init_code = NULL; + cleanup_code = NULL; + tmp_loop_var_name_counter = 1; + overflow_var_name_counter = 1; + known = Relation::True(0); + + ir_tree = build_ir_tree(control->clone(), NULL); + // std::vector<ir_tree_node *> ir_stmt; + + while (!init_loop(ir_tree, ir_stmt)) { + } + + + + for (int i = 0; i < stmt.size(); i++) { + std::map<int, CG_outputRepr*>::iterator it = replace.find(i); + + if (it != replace.end()) + stmt[i].code = it->second; + else + stmt[i].code = stmt[i].code; + } + + if (stmt.size() != 0) + dep = DependenceGraph(stmt[0].IS.n_set()); + else + dep = DependenceGraph(0); + // init the dependence graph + for (int i = 0; i < stmt.size(); i++) + dep.insert(); + + for (int i = 0; i < stmt.size(); i++) + for (int j = i; j < stmt.size(); j++) { + std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = test_data_dependences( + ir, stmt[i].code, stmt[i].IS, stmt[j].code, stmt[j].IS, + freevar, index, stmt_nesting_level_[i], + stmt_nesting_level_[j]); + + for (int k = 0; k < dv.first.size(); k++) { + if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k], + true)) + dep.connect(i, j, dv.first[k]); + else { + dep.connect(j, i, dv.first[k].reverse()); + } + + } + for (int k = 0; k < dv.second.size(); k++) + if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k], + false)) + dep.connect(j, i, dv.second[k]); + else { + dep.connect(i, j, dv.second[k].reverse()); + } + // std::pair<std::vector<DependenceVector>, + // std::vector<DependenceVector> > dv_ = test_data_dependences( + + } + + + + // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0] + for (int i = 0; i < stmt.size(); i++) { + int n = stmt[i].IS.n_set(); + stmt[i].xform = Relation(n, 2 * n + 1); + F_And *f_root = stmt[i].xform.add_and(); + + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(stmt[i].xform.output_var(2 * j), 1); + h.update_coef(stmt[i].xform.input_var(j), -1); + } + + for (int j = 1; j <= 2 * n + 1; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(stmt[i].xform.output_var(j), 1); + } + stmt[i].xform.simplify(); + } + + if (stmt.size() != 0) + num_dep_dim = stmt[0].IS.n_set(); + else + num_dep_dim = 0; + // debug + /*for (int i = 0; i < stmt.size(); i++) { + std::cout << i << ": "; + //stmt[i].xform.print(); + stmt[i].IS.print(); + std::cout << std::endl; + + }*/ + //end debug +} + +Loop::~Loop() { + + delete last_compute_cgr_; + delete last_compute_cg_; + + for (int i = 0; i < stmt.size(); i++) + if (stmt[i].code != NULL) { + stmt[i].code->clear(); + delete stmt[i].code; + } + + for (int i = 0; i < ir_tree.size(); i++) + delete ir_tree[i]; + + if (init_code != NULL) { + init_code->clear(); + delete init_code; + } + if (cleanup_code != NULL) { + cleanup_code->clear(); + delete cleanup_code; + } +} + +int Loop::get_dep_dim_of(int stmt_num, int level) const { + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invaid statement " + to_string(stmt_num)); + + if (level < 1 || level > stmt[stmt_num].loop_level.size()) + return -1; + + int trip_count = 0; + while (true) { + switch (stmt[stmt_num].loop_level[level - 1].type) { + case LoopLevelOriginal: + return stmt[stmt_num].loop_level[level - 1].payload; + case LoopLevelTile: + level = stmt[stmt_num].loop_level[level - 1].payload; + if (level < 1) + return -1; + if (level > stmt[stmt_num].loop_level.size()) + throw loop_error( + "incorrect loop level information for statement " + + to_string(stmt_num)); + break; + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(stmt_num)); + } + trip_count++; + if (trip_count >= stmt[stmt_num].loop_level.size()) + throw loop_error( + "incorrect loop level information for statement " + + to_string(stmt_num)); + } +} + +int Loop::get_last_dep_dim_before(int stmt_num, int level) const { + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invaid statement " + to_string(stmt_num)); + + if (level < 1) + return -1; + if (level > stmt[stmt_num].loop_level.size()) + level = stmt[stmt_num].loop_level.size() + 1; + + for (int i = level - 1; i >= 1; i--) + if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal) + return stmt[stmt_num].loop_level[i - 1].payload; + + return -1; +} + +void Loop::print_internal_loop_structure() const { + for (int i = 0; i < stmt.size(); i++) { + std::vector<int> lex = getLexicalOrder(i); + std::cout << "s" << i + 1 << ": "; + for (int j = 0; j < stmt[i].loop_level.size(); j++) { + if (2 * j < lex.size()) + std::cout << lex[2 * j]; + switch (stmt[i].loop_level[j].type) { + case LoopLevelOriginal: + std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; + break; + case LoopLevelTile: + std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; + break; + default: + std::cout << "(unknown)"; + } + std::cout << ' '; + } + for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) { + std::cout << lex[j]; + if (j != lex.size() - 1) + std::cout << ' '; + } + std::cout << std::endl; + } +} + +CG_outputRepr *Loop::getCode(int effort) const { + const int m = stmt.size(); + if (m == 0) + return NULL; + const int n = stmt[0].xform.n_out(); + + if (last_compute_cg_ == NULL) { + std::vector<Relation> IS(m); + std::vector<Relation> xforms(m); + for (int i = 0; i < m; i++) { + IS[i] = stmt[i].IS; + xforms[i] = stmt[i].xform; + } + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + + last_compute_cg_ = new CodeGen(xforms, IS, known); + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + } + + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { + delete last_compute_cgr_; + last_compute_cgr_ = last_compute_cg_->buildAST(effort); + last_compute_effort_ = effort; + } + + std::vector<CG_outputRepr *> stmts(m); + for (int i = 0; i < m; i++) + stmts[i] = stmt[i].code; + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *repr = last_compute_cgr_->printRepr(ocg, stmts); + + if (init_code != NULL) + repr = ocg->StmtListAppend(init_code->clone(), repr); + if (cleanup_code != NULL) + repr = ocg->StmtListAppend(repr, cleanup_code->clone()); + + return repr; +} + +void Loop::printCode(int effort) const { + const int m = stmt.size(); + if (m == 0) + return; + const int n = stmt[0].xform.n_out(); + + if (last_compute_cg_ == NULL) { + std::vector<Relation> IS(m); + std::vector<Relation> xforms(m); + for (int i = 0; i < m; i++) { + IS[i] = stmt[i].IS; + xforms[i] = stmt[i].xform; + } + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + + last_compute_cg_ = new CodeGen(xforms, IS, known); + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + } + + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { + delete last_compute_cgr_; + last_compute_cgr_ = last_compute_cg_->buildAST(effort); + last_compute_effort_ = effort; + } + + std::string repr = last_compute_cgr_->printString(); + std::cout << repr << std::endl; +} + +void Loop::printIterationSpace() const { + for (int i = 0; i < stmt.size(); i++) { + std::cout << "s" << i << ": "; + Relation r = getNewIS(i); + for (int j = 1; j <= r.n_inp(); j++) + r.name_input_var(j, CodeGen::loop_var_name_prefix + to_string(j)); + r.setup_names(); + r.print(); + } +} + +void Loop::printDependenceGraph() const { + if (dep.edgeCount() == 0) + std::cout << "no dependence exists" << std::endl; + else { + std::cout << "dependence graph:" << std::endl; + std::cout << dep; + } +} + +Relation Loop::getNewIS(int stmt_num) const { + Relation result; + + if (stmt[stmt_num].xform.is_null()) { + Relation known = Extend_Set(copy(this->known), + stmt[stmt_num].IS.n_set() - this->known.n_set()); + result = Intersection(copy(stmt[stmt_num].IS), known); + } else { + Relation known = Extend_Set(copy(this->known), + stmt[stmt_num].xform.n_out() - this->known.n_set()); + result = Intersection( + Range( + Restrict_Domain(copy(stmt[stmt_num].xform), + copy(stmt[stmt_num].IS))), known); + } + + result.simplify(2, 4); + + return result; +} + +std::vector<Relation> Loop::getNewIS() const { + const int m = stmt.size(); + + std::vector<Relation> new_IS(m); + for (int i = 0; i < m; i++) + new_IS[i] = getNewIS(i); + + return new_IS; +} + +void Loop::pragma(int stmt_num, int level, const std::string &pragmaText) { + // check sanity of parameters + if(stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *code = stmt[stmt_num].code; + ocg->CreatePragmaAttribute(code, level, pragmaText); +} +/* +void Loop::prefetch(int stmt_num, int level, const std::string &arrName, const std::string &indexName, int offset, int hint) { + // check sanity of parameters + if(stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *code = stmt[stmt_num].code; + ocg->CreatePrefetchAttribute(code, level, arrName, indexName, int offset, hint); +} +*/ + +void Loop::prefetch(int stmt_num, int level, const std::string &arrName, int hint) { + // check sanity of parameters + if(stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *code = stmt[stmt_num].code; + ocg->CreatePrefetchAttribute(code, level, arrName, hint); +} + +std::vector<int> Loop::getLexicalOrder(int stmt_num) const { + assert(stmt_num < stmt.size()); + + const int n = stmt[stmt_num].xform.n_out(); + std::vector<int> lex(n, 0); + + for (int i = 0; i < n; i += 2) + lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var); + + return lex; +} + +// find the sub loop nest specified by stmt_num and level, +// only iteration space satisfiable statements returned. +std::set<int> Loop::getSubLoopNest(int stmt_num, int level) const { + assert(stmt_num >= 0 && stmt_num < stmt.size()); + assert(level > 0 && level <= stmt[stmt_num].loop_level.size()); + + std::set<int> working; + for (int i = 0; i < stmt.size(); i++) + if (const_cast<Loop *>(this)->stmt[i].IS.is_upper_bound_satisfiable() + && stmt[i].loop_level.size() >= level) + working.insert(i); + + for (int i = 1; i <= level; i++) { + int a = getLexicalOrder(stmt_num, i); + for (std::set<int>::iterator j = working.begin(); j != working.end();) { + int b = getLexicalOrder(*j, i); + if (b != a) + working.erase(j++); + else + ++j; + } + } + + return working; +} + +int Loop::getLexicalOrder(int stmt_num, int level) const { + assert(stmt_num >= 0 && stmt_num < stmt.size()); + assert(level > 0 && level <= stmt[stmt_num].loop_level.size()+1); + + Relation &r = const_cast<Loop *>(this)->stmt[stmt_num].xform; + for (EQ_Iterator e(r.single_conjunct()->EQs()); e; e++) + if (abs((*e).get_coef(r.output_var(2 * level - 1))) == 1) { + bool is_const = true; + for (Constr_Vars_Iter cvi(*e); cvi; cvi++) + if (cvi.curr_var() != r.output_var(2 * level - 1)) { + is_const = false; + break; + } + if (is_const) { + int t = static_cast<int>((*e).get_const()); + return (*e).get_coef(r.output_var(2 * level - 1)) > 0 ? -t : t; + } + } + + throw loop_error( + "can't find lexical order for statement " + to_string(stmt_num) + + "'s loop level " + to_string(level)); +} + +std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const { + const int m = stmt.size(); + + std::set<int> same_loops; + for (int i = 0; i < m; i++) { + if (dim < 0) + same_loops.insert(i); + else { + std::vector<int> a_lex = getLexicalOrder(i); + int j; + for (j = 0; j <= dim; j += 2) + if (lex[j] != a_lex[j]) + break; + if (j > dim) + same_loops.insert(i); + } + + } + + return same_loops; +} + +void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) { + const int m = stmt.size(); + + if (amount == 0) + return; + + for (int i = 0; i < m; i++) { + std::vector<int> lex2 = getLexicalOrder(i); + + bool need_shift = true; + + for (int j = 0; j < dim; j++) + if (lex2[j] != lex[j]) { + need_shift = false; + break; + } + + if (!need_shift) + continue; + + if (amount > 0) { + if (lex2[dim] < lex[dim]) + continue; + } else if (amount < 0) { + if (lex2[dim] > lex[dim]) + continue; + } + + assign_const(stmt[i].xform, dim, lex2[dim] + amount); + } +} + +std::vector<std::set<int> > Loop::sort_by_same_loops(std::set<int> active, + int level) { + + std::set<int> not_nested_at_this_level; + std::map<ir_tree_node*, std::set<int> > sorted_by_loop; + std::map<int, std::set<int> > sorted_by_lex_order; + std::vector<std::set<int> > to_return; + bool lex_order_already_set = false; + for (std::set<int>::iterator it = active.begin(); it != active.end(); + it++) { + + if (stmt[*it].ir_stmt_node == NULL) + lex_order_already_set = true; + } + + if (lex_order_already_set) { + + for (std::set<int>::iterator it = active.begin(); it != active.end(); + it++) { + std::map<int, std::set<int> >::iterator it2 = + sorted_by_lex_order.find( + get_const(stmt[*it].xform, 2 * (level - 1), + Output_Var)); + + if (it2 != sorted_by_lex_order.end()) + it2->second.insert(*it); + else { + + std::set<int> to_insert; + + to_insert.insert(*it); + + sorted_by_lex_order.insert( + std::pair<int, std::set<int> >( + get_const(stmt[*it].xform, 2 * (level - 1), + Output_Var), to_insert)); + + } + + } + + for (std::map<int, std::set<int> >::iterator it2 = + sorted_by_lex_order.begin(); it2 != sorted_by_lex_order.end(); + it2++) + to_return.push_back(it2->second); + + } else { + + for (std::set<int>::iterator it = active.begin(); it != active.end(); + it++) { + + ir_tree_node* itn = stmt[*it].ir_stmt_node; + itn = itn->parent; + while ((itn != NULL) && (itn->payload != level - 1)) + itn = itn->parent; + + if (itn == NULL) + not_nested_at_this_level.insert(*it); + else { + std::map<ir_tree_node*, std::set<int> >::iterator it2 = + sorted_by_loop.find(itn); + + if (it2 != sorted_by_loop.end()) + it2->second.insert(*it); + else { + std::set<int> to_insert; + + to_insert.insert(*it); + + sorted_by_loop.insert( + std::pair<ir_tree_node*, std::set<int> >(itn, + to_insert)); + + } + + } + + } + if (not_nested_at_this_level.size() > 0) { + for (std::set<int>::iterator it = not_nested_at_this_level.begin(); + it != not_nested_at_this_level.end(); it++) { + std::set<int> temp; + temp.insert(*it); + to_return.push_back(temp); + + } + } + for (std::map<ir_tree_node*, std::set<int> >::iterator it2 = + sorted_by_loop.begin(); it2 != sorted_by_loop.end(); it2++) + to_return.push_back(it2->second); + } + return to_return; +} + +void update_successors(int n, int node_num[], int cant_fuse_with[], + Graph<std::set<int>, bool> &g, std::list<int> &work_list) { + + std::set<int> disconnect; + for (Graph<std::set<int>, bool>::EdgeList::iterator i = + g.vertex[n].second.begin(); i != g.vertex[n].second.end(); i++) { + int m = i->first; + + if (node_num[m] != -1) + throw loop_error("Graph input for fusion has cycles not a DAG!!"); + + std::vector<bool> check_ = g.getEdge(n, m); + + bool has_bad_edge_path = false; + for (int i = 0; i < check_.size(); i++) + if (!check_[i]) { + has_bad_edge_path = true; + break; + } + if (has_bad_edge_path) + cant_fuse_with[m] = std::max(cant_fuse_with[m], node_num[n]); + else + cant_fuse_with[m] = std::max(cant_fuse_with[m], cant_fuse_with[n]); + disconnect.insert(m); + } + + + for (std::set<int>::iterator i = disconnect.begin(); i != disconnect.end(); + i++) { + g.disconnect(n, *i); + + bool no_incoming_edges = true; + for (int j = 0; j < g.vertex.size(); j++) + if (j != *i) + if (g.hasEdge(j, *i)) { + no_incoming_edges = false; + break; + } + + + if (no_incoming_edges) + work_list.push_back(*i); + } + +} + +Graph<std::set<int>, bool> Loop::construct_induced_graph_at_level( + std::vector<std::set<int> > s, DependenceGraph dep, int dep_dim) { + Graph<std::set<int>, bool> g; + + for (int i = 0; i < s.size(); i++) + g.insert(s[i]); + + for (int i = 0; i < s.size(); i++) { + + for (int j = i + 1; j < s.size(); j++) { + bool has_true_edge_i_to_j = false; + bool has_true_edge_j_to_i = false; + bool is_connected_i_to_j = false; + bool is_connected_j_to_i = false; + for (std::set<int>::iterator ii = s[i].begin(); ii != s[i].end(); + ii++) { + + for (std::set<int>::iterator jj = s[j].begin(); + jj != s[j].end(); jj++) { + + std::vector<DependenceVector> dvs = dep.getEdge(*ii, *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && dvs[k].has_been_carried_at(dep_dim))) { + + if (dvs[k].is_data_dependence() + && dvs[k].has_negative_been_carried_at( + dep_dim)) { + //g.connect(i, j, false); + is_connected_i_to_j = true; + break; + } else { + //g.connect(i, j, true); + + has_true_edge_i_to_j = true; + //break + } + } + + //if (is_connected) + + // break; + // if (has_true_edge_i_to_j && !is_connected_i_to_j) + // g.connect(i, j, true); + dvs = dep.getEdge(*jj, *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && dvs[k].has_been_carried_at(dep_dim))) { + + if (is_connected_i_to_j || has_true_edge_i_to_j) + throw loop_error( + "Graph input for fusion has cycles not a DAG!!"); + + if (dvs[k].is_data_dependence() + && dvs[k].has_negative_been_carried_at( + dep_dim)) { + //g.connect(i, j, false); + is_connected_j_to_i = true; + break; + } else { + //g.connect(i, j, true); + + has_true_edge_j_to_i = true; + //break; + } + } + + // if (is_connected) + //break; + // if (is_connected) + //break; + } + + + //if (is_connected) + // break; + } + + + if (is_connected_i_to_j) + g.connect(i, j, false); + else if (has_true_edge_i_to_j) + g.connect(i, j, true); + + if (is_connected_j_to_i) + g.connect(j, i, false); + else if (has_true_edge_j_to_i) + g.connect(j, i, true); + + + } + } + return g; +} + +std::vector<std::set<int> > Loop::typed_fusion(Graph<std::set<int>, bool> g) { + + bool roots[g.vertex.size()]; + + for (int i = 0; i < g.vertex.size(); i++) + roots[i] = true; + + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + + if (g.hasEdge(i, j)) + roots[j] = false; + + if (g.hasEdge(j, i)) + roots[i] = false; + + } + + std::list<int> work_list; + int cant_fuse_with[g.vertex.size()]; + std::vector<std::set<int> > s; + //Each Fused set's representative node + + int node_to_fused_nodes[g.vertex.size()]; + int node_num[g.vertex.size()]; + for (int i = 0; i < g.vertex.size(); i++) { + if (roots[i] == true) + work_list.push_back(i); + cant_fuse_with[i] = 0; + node_to_fused_nodes[i] = 0; + node_num[i] = -1; + } + // topological sort according to chun's permute algorithm + // std::vector<std::set<int> > s = g.topoSort(); + std::vector<std::set<int> > s2 = g.topoSort(); + if (work_list.empty() || (s2.size() != g.vertex.size())) { + + std::cout << s2.size() << "\t" << g.vertex.size() << std::endl; + throw loop_error("Input for fusion not a DAG!!"); + + + } + int fused_nodes_counter = 0; + while (!work_list.empty()) { + int n = work_list.front(); + //int n_ = g.vertex[n].first; + work_list.pop_front(); + int node; + if (cant_fuse_with[n] == 0) + node = 0; + else + node = cant_fuse_with[n]; + + if ((fused_nodes_counter != 0) && (node != fused_nodes_counter)) { + int rep_node = node_to_fused_nodes[node]; + node_num[n] = node_num[rep_node]; + + try { + update_successors(n, node_num, cant_fuse_with, g, work_list); + } catch (const loop_error &e) { + + throw loop_error( + "statements cannot be fused together due to negative dependence"); + + + } + for (std::set<int>::iterator it = g.vertex[n].first.begin(); + it != g.vertex[n].first.end(); it++) + s[node].insert(*it); + } else { + //std::set<int> new_node; + //new_node.insert(n_); + s.push_back(g.vertex[n].first); + node_to_fused_nodes[node] = n; + node_num[n] = ++node; + try { + update_successors(n, node_num, cant_fuse_with, g, work_list); + } catch (const loop_error &e) { + + throw loop_error( + "statements cannot be fused together due to negative dependence"); + + + } + fused_nodes_counter++; + } + } + + return s; +} + +void Loop::setLexicalOrder(int dim, const std::set<int> &active, + int starting_order, std::vector<std::vector<std::string> > idxNames) { + if (active.size() == 0) + return; + + // check for sanity of parameters + if (dim < 0 || dim % 2 != 0) + throw std::invalid_argument( + "invalid constant loop level to set lexicographical order"); + std::vector<int> lex; + int ref_stmt_num; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if ((*i) < 0 || (*i) >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (dim >= stmt[*i].xform.n_out()) + throw std::invalid_argument( + "invalid constant loop level to set lexicographical order"); + if (i == active.begin()) { + lex = getLexicalOrder(*i); + ref_stmt_num = *i; + } else { + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 0; j < dim; j += 2) + if (lex[j] != lex2[j]) + throw std::invalid_argument( + "statements are not in the same sub loop nest"); + } + } + + // sepearate statements by current loop level types + int level = (dim + 2) / 2; + std::map<std::pair<LoopLevelType, int>, std::set<int> > active_by_level_type; + std::set<int> active_by_no_level; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if (level > stmt[*i].loop_level.size()) + active_by_no_level.insert(*i); + else + active_by_level_type[std::make_pair( + stmt[*i].loop_level[level - 1].type, + stmt[*i].loop_level[level - 1].payload)].insert(*i); + } + + // further separate statements due to control dependences + std::vector<std::set<int> > active_by_level_type_splitted; + for (std::map<std::pair<LoopLevelType, int>, std::set<int> >::iterator i = + active_by_level_type.begin(); i != active_by_level_type.end(); i++) + active_by_level_type_splitted.push_back(i->second); + for (std::set<int>::iterator i = active_by_no_level.begin(); + i != active_by_no_level.end(); i++) + for (int j = active_by_level_type_splitted.size() - 1; j >= 0; j--) { + std::set<int> controlled, not_controlled; + for (std::set<int>::iterator k = + active_by_level_type_splitted[j].begin(); + k != active_by_level_type_splitted[j].end(); k++) { + std::vector<DependenceVector> dvs = dep.getEdge(*i, *k); + bool is_controlled = false; + for (int kk = 0; kk < dvs.size(); kk++) + if (dvs[kk].type = DEP_CONTROL) { + is_controlled = true; + break; + } + if (is_controlled) + controlled.insert(*k); + else + not_controlled.insert(*k); + } + if (controlled.size() != 0 && not_controlled.size() != 0) { + active_by_level_type_splitted.erase( + active_by_level_type_splitted.begin() + j); + active_by_level_type_splitted.push_back(controlled); + active_by_level_type_splitted.push_back(not_controlled); + } + } + + // set lexical order separating loops with different loop types first + if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) { + int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; + + Graph<std::set<int>, Empty> g; + for (std::vector<std::set<int> >::iterator i = + active_by_level_type_splitted.begin(); + i != active_by_level_type_splitted.end(); i++) + g.insert(*i); + for (std::set<int>::iterator i = active_by_no_level.begin(); + i != active_by_no_level.end(); i++) { + std::set<int> t; + t.insert(*i); + g.insert(t); + } + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + bool connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); + ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); + jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*ii, + *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && !dvs[k].has_been_carried_before( + dep_dim))) { + g.connect(i, j); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); + ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); + jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*jj, + *ii); + // find the sub loop nest specified by stmt_num and level, + // only iteration space satisfiable statements returned. + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && !dvs[k].has_been_carried_before( + dep_dim))) { + g.connect(j, i); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + } + + std::vector<std::set<int> > s = g.topoSort(); + if (s.size() != g.vertex.size()) + throw loop_error( + "cannot separate statements with different loop types at loop level " + + to_string(level)); + + // assign lexical order + int order = starting_order; + for (int i = 0; i < s.size(); i++) { + std::set<int> &cur_scc = g.vertex[*(s[i].begin())].first; + int sz = cur_scc.size(); + if (sz == 1) { + int cur_stmt = *(cur_scc.begin()); + assign_const(stmt[cur_stmt].xform, dim, order); + for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) + assign_const(stmt[cur_stmt].xform, j, 0); + order++; + } else { + setLexicalOrder(dim, cur_scc, order, idxNames); + order += sz; + } + } + } + // set lexical order seperating single iteration statements and loops + else { + std::set<int> true_singles; + std::set<int> nonsingles; + std::map<coef_t, std::set<int> > fake_singles; + std::set<int> fake_singles_; + + // sort out statements that do not require loops + for (std::set<int>::iterator i = active.begin(); i != active.end(); + i++) { + Relation cur_IS = getNewIS(*i); + if (is_single_iteration(cur_IS, dim + 1)) { + bool is_all_single = true; + for (int j = dim + 3; j < stmt[*i].xform.n_out(); j += 2) + if (!is_single_iteration(cur_IS, j)) { + is_all_single = false; + break; + } + if (is_all_single) + true_singles.insert(*i); + else { + fake_singles_.insert(*i); + try { + fake_singles[get_const(cur_IS, dim + 1, Set_Var)].insert( + *i); + } catch (const std::exception &e) { + fake_singles[posInfinity].insert(*i); + } + } + } else + nonsingles.insert(*i); + } + + + // split nonsingles forcibly according to negative dependences present (loop unfusible) + int dep_dim = get_dep_dim_of(ref_stmt_num, level); + + if (dim < stmt[ref_stmt_num].xform.n_out() - 1) { + + bool dummy_level_found = false; + + std::vector<std::set<int> > s; + + s = sort_by_same_loops(active, level); + bool further_levels_exist = false; + + if (!idxNames.empty()) + if (level <= idxNames[ref_stmt_num].size()) + if (idxNames[ref_stmt_num][level - 1].length() == 0) { + // && s.size() == 1) { + int order1 = 0; + dummy_level_found = true; + + for (int i = level; i < idxNames[ref_stmt_num].size(); + i++) + if (idxNames[ref_stmt_num][i].length() > 0) + further_levels_exist = true; + + } + + //if (!dummy_level_found) { + + if (s.size() > 1) { + + Graph<std::set<int>, bool> g = construct_induced_graph_at_level( + s, dep, dep_dim); + s = typed_fusion(g); + } + int order = 0; + for (int i = 0; i < s.size(); i++) { + + for (std::set<int>::iterator it = s[i].begin(); + it != s[i].end(); it++) + assign_const(stmt[*it].xform, dim, order); + + if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1)) + setLexicalOrder(dim + 2, s[i], order, idxNames); + + order++; + } + //} + /* else { + + int order1 = 0; + int order = 0; + for (std::set<int>::iterator i = active.begin(); + i != active.end(); i++) { + if (!further_levels_exist) + assign_const(stmt[*i].xform, dim, order1++); + else + assign_const(stmt[*i].xform, dim, order1); + + } + + if ((dim + 2) <= (stmt[ref_stmt_num].xform.n_out() - 1) && further_levels_exist) + setLexicalOrder(dim + 2, active, order, idxNames); + } + */ + } else { + int dummy_order = 0; + for (std::set<int>::iterator i = active.begin(); i != active.end(); + i++) + assign_const(stmt[*i].xform, dim, dummy_order++); + } + /*for (int i = 0; i < g2.vertex.size(); i++) + for (int j = i+1; j < g2.vertex.size(); j++) { + std::vector<DependenceVector> dvs = dep.getEdge(g2.vertex[i].first, g2.vertex[j].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { + g2.connect(i, j); + break; + } + dvs = dep.getEdge(g2.vertex[j].first, g2.vertex[i].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { + g2.connect(j, i); + break; + } + } + + std::vector<std::set<int> > s2 = g2.packed_topoSort(); + + std::vector<std::set<int> > splitted_nonsingles; + for (int i = 0; i < s2.size(); i++) { + std::set<int> cur_scc; + for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) + cur_scc.insert(g2.vertex[*j].first); + splitted_nonsingles.push_back(cur_scc); + } + */ + //convert to dependence graph for grouped statements + //dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; + /*int order = 0; + for (std::set<int>::iterator j = active.begin(); j != active.end(); + j++) { + std::set<int> continuous; + std::cout<< active.size()<<std::endl; + while (nonsingles.find(*j) != nonsingles.end() && j != active.end()) { + continuous.insert(*j); + j++; + } + + printf("continuous size is %d\n", continuous.size()); + + + + if (continuous.size() > 0) { + std::vector<std::set<int> > s = typed_fusion(continuous, dep, + dep_dim); + + for (int i = 0; i < s.size(); i++) { + for (std::set<int>::iterator l = s[i].begin(); + l != s[i].end(); l++) { + assign_const(stmt[*l].xform, dim + 2, order); + setLexicalOrder(dim + 2, s[i]); + } + order++; + } + } + + if (j != active.end()) { + assign_const(stmt[*j].xform, dim + 2, order); + + for (int k = dim + 4; k < stmt[*j].xform.n_out(); k += 2) + assign_const(stmt[*j].xform, k, 0); + order++; + } + + if( j == active.end()) + break; + } + */ + + + // assign lexical order + /*int order = starting_order; + for (int i = 0; i < s.size(); i++) { + // translate each SCC into original statements + std::set<int> cur_scc; + for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) + copy(s[i].begin(), s[i].end(), + inserter(cur_scc, cur_scc.begin())); + + // now assign the constant + for (std::set<int>::iterator j = cur_scc.begin(); + j != cur_scc.end(); j++) + assign_const(stmt[*j].xform, dim, order); + + if (cur_scc.size() > 1) + setLexicalOrder(dim + 2, cur_scc); + else if (cur_scc.size() == 1) { + int cur_stmt = *(cur_scc.begin()); + for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) + assign_const(stmt[cur_stmt].xform, j, 0); + } + + if (cur_scc.size() > 0) + order++; + } + */ + } +} + +void Loop::apply_xform() { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + apply_xform(active); +} + +void Loop::apply_xform(int stmt_num) { + std::set<int> active; + active.insert(stmt_num); + apply_xform(active); +} + +void Loop::apply_xform(std::set<int> &active) { + int max_n = 0; + + CG_outputBuilder *ocg = ir->builder(); + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].loop_level.size(); + if (n > max_n) + max_n = n; + + std::vector<int> lex = getLexicalOrder(*i); + + Relation mapping(2 * n + 1, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + mapping = Composition(mapping, stmt[*i].xform); + mapping.simplify(); + + // match omega input/output variables to variable names in the code + for (int j = 1; j <= stmt[*i].IS.n_set(); j++) + mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name()); + for (int j = 1; j <= n; j++) + mapping.name_output_var(j, + tmp_loop_var_name_prefix + + to_string(tmp_loop_var_name_counter + j - 1)); + mapping.setup_names(); + + Relation known = Extend_Set(copy(this->known), + mapping.n_out() - this->known.n_set()); + //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL)); + std::vector<std::string> loop_vars; + for (int j = 1; j <= stmt[*i].IS.n_set(); j++) + loop_vars.push_back(stmt[*i].IS.set_var(j)->name()); + std::vector<CG_outputRepr *> subs = output_substitutions(ocg, + Inverse(copy(mapping)), + std::vector<std::pair<CG_outputRepr *, int> >(mapping.n_out(), + std::make_pair(static_cast<CG_outputRepr *>(NULL), 0))); + stmt[*i].code = ocg->CreateSubstitutedStmt(0, stmt[*i].code, loop_vars, + subs); + stmt[*i].IS = Range(Restrict_Domain(mapping, stmt[*i].IS)); + stmt[*i].IS.simplify(); + + // replace original transformation relation with straight 1-1 mapping + mapping = Relation(n, 2 * n + 1); + f_root = mapping.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 1; j <= 2 * n + 1; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_const(-lex[j - 1]); + } + stmt[*i].xform = mapping; + } + + tmp_loop_var_name_counter += max_n; +} + +void Loop::addKnown(const Relation &cond) { + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + int n1 = this->known.n_set(); + + Relation r = copy(cond); + int n2 = r.n_set(); + + if (n1 < n2) + this->known = Extend_Set(this->known, n2 - n1); + else if (n1 > n2) + r = Extend_Set(r, n1 - n2); + + this->known = Intersection(this->known, r); +} + +void Loop::removeDependence(int stmt_num_from, int stmt_num_to) { + // check for sanity of parameters + if (stmt_num_from >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(stmt_num_from)); + if (stmt_num_to >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(stmt_num_to)); + + dep.disconnect(stmt_num_from, stmt_num_to); +} + +void Loop::dump() const { + for (int i = 0; i < stmt.size(); i++) { + std::vector<int> lex = getLexicalOrder(i); + std::cout << "s" << i + 1 << ": "; + for (int j = 0; j < stmt[i].loop_level.size(); j++) { + if (2 * j < lex.size()) + std::cout << lex[2 * j]; + switch (stmt[i].loop_level[j].type) { + case LoopLevelOriginal: + std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; + break; + case LoopLevelTile: + std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; + break; + default: + std::cout << "(unknown)"; + } + std::cout << ' '; + } + for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) { + std::cout << lex[j]; + if (j != lex.size() - 1) + std::cout << ' '; + } + std::cout << std::endl; + } +} + +bool Loop::nonsingular(const std::vector<std::vector<int> > &T) { + if (stmt.size() == 0) + return true; + + // check for sanity of parameters + for (int i = 0; i < stmt.size(); i++) { + if (stmt[i].loop_level.size() != num_dep_dim) + throw std::invalid_argument( + "nonsingular loop transformations must be applied to original perfect loop nest"); + for (int j = 0; j < stmt[i].loop_level.size(); j++) + if (stmt[i].loop_level[j].type != LoopLevelOriginal) + throw std::invalid_argument( + "nonsingular loop transformations must be applied to original perfect loop nest"); + } + if (T.size() != num_dep_dim) + throw std::invalid_argument("invalid transformation matrix"); + for (int i = 0; i < stmt.size(); i++) + if (T[i].size() != num_dep_dim + 1 && T[i].size() != num_dep_dim) + throw std::invalid_argument("invalid transformation matrix"); + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + // build relation from matrix + Relation mapping(2 * num_dep_dim + 1, 2 * num_dep_dim + 1); + F_And *f_root = mapping.add_and(); + for (int i = 0; i < num_dep_dim; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * (i + 1)), -1); + for (int j = 0; j < num_dep_dim; j++) + if (T[i][j] != 0) + h.update_coef(mapping.input_var(2 * (j + 1)), T[i][j]); + if (T[i].size() == num_dep_dim + 1) + h.update_const(T[i][num_dep_dim]); + } + for (int i = 1; i <= 2 * num_dep_dim + 1; i += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), -1); + h.update_coef(mapping.input_var(i), 1); + } + + // update transformation relations + for (int i = 0; i < stmt.size(); i++) + stmt[i].xform = Composition(copy(mapping), stmt[i].xform); + + // update dependence graph + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + switch (dv.type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(num_dep_dim), ubounds( + num_dep_dim); + for (int p = 0; p < num_dep_dim; p++) { + coef_t lb = 0; + coef_t ub = 0; + for (int q = 0; q < num_dep_dim; q++) { + if (T[p][q] > 0) { + if (lb == -posInfinity + || dv.lbounds[q] == -posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.lbounds[q]; + if (ub == posInfinity + || dv.ubounds[q] == posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.ubounds[q]; + } else if (T[p][q] < 0) { + if (lb == -posInfinity + || dv.ubounds[q] == posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.ubounds[q]; + if (ub == posInfinity + || dv.lbounds[q] == -posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.lbounds[q]; + } + } + if (T[p].size() == num_dep_dim + 1) { + if (lb != -posInfinity) + lb += T[p][num_dep_dim]; + if (ub != posInfinity) + ub += T[p][num_dep_dim]; + } + lbounds[p] = lb; + ubounds[p] = ub; + } + dv.lbounds = lbounds; + dv.ubounds = ubounds; + + break; + } + default: + ; + } + } + j->second = dvs; + } + + // set constant loop values + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + setLexicalOrder(0, active); + + return true; +} + + +bool Loop::is_dependence_valid_based_on_lex_order(int i, int j, + const DependenceVector &dv, bool before) { + std::vector<int> lex_i = getLexicalOrder(i); + std::vector<int> lex_j = getLexicalOrder(j); + int last_dim; + if (!dv.is_scalar_dependence) { + for (last_dim = 0; + last_dim < lex_i.size() && (lex_i[last_dim] == lex_j[last_dim]); + last_dim++) + ; + last_dim = last_dim / 2; + if (last_dim == 0) + return true; + + for (int i = 0; i < last_dim; i++) { + if (dv.lbounds[i] > 0) + return true; + else if (dv.lbounds[i] < 0) + return false; + } + } + if (before) + return true; + + return false; + +} + @@ -0,0 +1,168 @@ +#ifndef LOOP_HH +#define LOOP_HH + +#include <omega.h> +#include <codegen.h> +#include <code_gen/CG.h> +#include <vector> +#include <map> +#include <set> +#include "dep.hh" +#include "ir_code.hh" +#include "irtools.hh" + +class IR_Code; + +enum TilingMethodType { StridedTile, CountedTile }; +enum LoopLevelType { LoopLevelOriginal, LoopLevelTile, LoopLevelUnknown }; + + +// Describes properties of each loop level of a statement. "payload" +// for LoopLevelOriginal means iteration space dimension, for +// LoopLevelTile means tiled loop level. Special value -1 for +// LoopLevelTile means purely derived loop. For dependence dimension +// payloads, the values must be in an increasing order. +// "parallel_level" will be used by code generation to support +// multi-level parallelization (default 0 means sequential loop under +// the current parallelization level). +struct LoopLevel { + LoopLevelType type; + int payload; + int parallel_level; +}; + +struct Statement { + omega::CG_outputRepr *code; + omega::Relation IS; + omega::Relation xform; + std::vector<LoopLevel> loop_level; + ir_tree_node *ir_stmt_node; + //protonu--temporarily putting this back here + //omega::Tuple<int> nonSplitLevels; + //end--protonu. +}; + + +class Loop { +protected: + int tmp_loop_var_name_counter; + static const std::string tmp_loop_var_name_prefix; + int overflow_var_name_counter; + static const std::string overflow_var_name_prefix; + std::vector<int> stmt_nesting_level_; + std::vector<std::string> index; + std::map<int, omega::CG_outputRepr *> replace; + +public: + IR_Code *ir; + std::vector<omega::Free_Var_Decl*> freevar; + std::vector<Statement> stmt; + std::vector<ir_tree_node *> ir_stmt; + std::vector<ir_tree_node *> ir_tree; + DependenceGraph dep; + int num_dep_dim; + omega::Relation known; + omega::CG_outputRepr *init_code; + omega::CG_outputRepr *cleanup_code; + std::map<int, std::vector<omega::Free_Var_Decl *> > overflow; + + +protected: + mutable omega::CodeGen *last_compute_cg_; + mutable omega::CG_result *last_compute_cgr_; + mutable int last_compute_effort_; + +protected: + bool init_loop(std::vector<ir_tree_node *> &ir_tree, std::vector<ir_tree_node *> &ir_stmt); + int get_dep_dim_of(int stmt, int level) const; + int get_last_dep_dim_before(int stmt, int level) const; + std::vector<omega::Relation> getNewIS() const; + omega::Relation getNewIS(int stmt_num) const; + std::vector<int> getLexicalOrder(int stmt_num) const; + int getLexicalOrder(int stmt_num, int level) const; + std::set<int> getStatements(const std::vector<int> &lex, int dim) const; + void shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount); + void setLexicalOrder(int dim, const std::set<int> &active, int starting_order = 0, std::vector< std::vector<std::string> >idxNames= std::vector< std::vector<std::string> >()); + void apply_xform(int stmt_num); + void apply_xform(std::set<int> &active); + void apply_xform(); + std::set<int> getSubLoopNest(int stmt_num, int level) const; + + +public: + Loop() { ir = NULL; tmp_loop_var_name_counter = 1; init_code = NULL; } + Loop(const IR_Control *control); + ~Loop(); + + omega::CG_outputRepr *getCode(int effort = 1) const; + void printCode(int effort = 1) const; + void addKnown(const omega::Relation &cond); + void print_internal_loop_structure() const; + bool isInitialized() const; + int num_statement() const { return stmt.size(); } + void printIterationSpace() const; + void printDependenceGraph() const; + void removeDependence(int stmt_num_from, int stmt_num_to); + void dump() const; + + std::vector<std::set <int > > sort_by_same_loops(std::set<int > active, int level); + // + // legacy unimodular transformations for perfectly nested loops + // e.g. M*(i,j)^T = (i',j')^T or M*(i,j,1)^T = (i',j')^T + // + bool nonsingular(const std::vector<std::vector<int> > &M); + + // + // high-level loop transformations + // + void permute(const std::set<int> &active, const std::vector<int> &pi); + void permute(int stmt_num, int level, const std::vector<int> &pi); + void permute(const std::vector<int> &pi); + void original(); + + void tile(int stmt_num, int level, int tile_size, int outer_level = 1, TilingMethodType method = StridedTile, int alignment_offset = 0, int alignment_multiple = 1); + std::set<int> split(int stmt_num, int level, const omega::Relation &cond); + std::set<int> unroll(int stmt_num, int level, int unroll_amount, std::vector< std::vector<std::string> >idxNames= std::vector< std::vector<std::string> >(), int cleanup_split_level = 0); + + bool datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, int memory_type = 0); + bool datacopy(int stmt_num, int level, const std::string &array_name, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, int memory_type = 0); + bool datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, int memory_type = 0); + bool datacopy_privatized(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, const std::vector<int> &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, int memory_type = 0); + bool datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, int level, const std::vector<int> &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type = 0); + //std::set<int> scalar_replacement_inner(int stmt_num); + + + + Graph<std::set<int>, bool> construct_induced_graph_at_level(std::vector<std::set<int> > s, DependenceGraph dep, int dep_dim); + std::vector<std::set<int> > typed_fusion(Graph<std::set<int>, bool> g); + void fuse(const std::set<int> &stmt_nums, int level); + void distribute(const std::set<int> &stmt_nums, int level); + void skew(const std::set<int> &stmt_nums, int level, const std::vector<int> &skew_amount); + void shift(const std::set<int> &stmt_nums, int level, int shift_amount); + void scale(const std::set<int> &stmt_nums, int level, int scale_amount); + void reverse(const std::set<int> &stmt_nums, int level); + void peel(int stmt_num, int level, int peel_amount = 1); + // + // more fancy loop transformations + // + void modular_shift(int stmt_num, int level, int shift_amount) {} + void diagonal_map(int stmt_num, const std::pair<int, int> &levels, int offset) {} + void modular_partition(int stmt_num, int level, int stride) {} + + // + // derived loop transformations + // + void shift_to(int stmt_num, int level, int absolute_position); + std::set<int> unroll_extra(int stmt_num, int level, int unroll_amount, int cleanup_split_level = 0); + bool is_dependence_valid_based_on_lex_order(int i, int j, + const DependenceVector &dv, bool before); + // + // other public operations + // + void pragma(int stmt_num, int level, const std::string &pragmaText); + void prefetch(int stmt_num, int level, const std::string &arrName, int hint); + //void prefetch(int stmt_num, int level, const std::string &arrName, const std::string &indexName, int offset, int hint); +}; + + +#endif diff --git a/loop_backup.cc b/loop_backup.cc new file mode 100644 index 0000000..b361ed4 --- /dev/null +++ b/loop_backup.cc @@ -0,0 +1,3311 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Core loop transformation functionality. + + Notes: + "level" (starting from 1) means loop level and it corresponds to "dim" + (starting from 0) in transformed iteration space [c_1,l_1,c_2,l_2,...., + c_n,l_n,c_(n+1)], e.g., l_2 is loop level 2 in generated code, dim 3 + in transformed iteration space, and variable 4 in Omega relation. + All c's are constant numbers only and they will not show up as actual loops. + Formula: + dim = 2*level - 1 + var = dim + 1 + + History: + 10/2005 Created by Chun Chen. + 09/2009 Expand tile functionality, -chun + 10/2009 Initialize unfusible loop nest without bailing out, -chun +*****************************************************************************/ + +#include <limits.h> +#include <math.h> +#include <code_gen/code_gen.h> +#include <code_gen/CG_outputBuilder.h> +#include <code_gen/output_repr.h> +#include <iostream> +#include <map> +#include "loop.hh" +#include "omegatools.hh" +#include "irtools.hh" +#include "chill_error.hh" + +using namespace omega; + +const std::string Loop::tmp_loop_var_name_prefix = std::string("_t"); +const std::string Loop::overflow_var_name_prefix = std::string("over"); + +//----------------------------------------------------------------------------- +// Class Loop +//----------------------------------------------------------------------------- + +bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, std::vector<ir_tree_node *> &ir_stmt) { + ir_stmt = extract_ir_stmts(ir_tree); + std::vector<int> stmt_nesting_level(ir_stmt.size()); + for (int i = 0; i < ir_stmt.size(); i++) { + ir_stmt[i]->payload = i; + int t = 0; + ir_tree_node *itn = ir_stmt[i]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + t++; + } + stmt_nesting_level[i] = t; + } + + stmt = std::vector<Statement>(ir_stmt.size()); + int n_dim = -1; + int max_loc; + std::vector<std::string> index; + for (int i = 0; i < ir_stmt.size(); i++) { + int max_nesting_level = -1; + int loc; + for (int j = 0; j < ir_stmt.size(); j++) + if (stmt_nesting_level[j] > max_nesting_level) { + max_nesting_level = stmt_nesting_level[j]; + loc = j; + } + + // most deeply nested statement acting as a reference point + if (n_dim == -1) { + n_dim = max_nesting_level; + max_loc = loc; + + index = std::vector<std::string>(n_dim); + + ir_tree_node *itn = ir_stmt[loc]; + int cur_dim = n_dim-1; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) { + index[cur_dim] = static_cast<IR_Loop *>(itn->content)->index()->name(); + itn->payload = cur_dim--; + } + } + } + + // align loops by names, temporary solution + ir_tree_node *itn = ir_stmt[loc]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { + std::string name = static_cast<IR_Loop *>(itn->content)->index()->name(); + for (int j = 0; j < n_dim; j++) + if (index[j] == name) { + itn->payload = j; + break; + } + if (itn->payload == -1) + throw loop_error("no complex alignment yet"); + } + } + + // set relation variable names + Relation r(n_dim); + F_And *f_root = r.add_and(); + itn = ir_stmt[loc]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + r.name_set_var(itn->payload+1, static_cast<IR_Loop *>(itn->content)->index()->name()); + } + + // extract information from loop/if structures + std::vector<bool> processed(n_dim, false); + Tuple<std::string> vars_to_be_reversed; + itn = ir_stmt[loc]; + while (itn->parent != NULL) { + itn = itn->parent; + + switch (itn->content->type()) { + case IR_CONTROL_LOOP: { + IR_Loop *lp = static_cast<IR_Loop *>(itn->content); + Variable_ID v = r.set_var(itn->payload+1); + int c; + + try { + c = lp->step_size(); + if (c > 0) { + CG_outputRepr *lb = lp->lower_bound(); + exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_GE, true); + CG_outputRepr *ub = lp->upper_bound(); + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', cond, true); + else + throw ir_error("loop condition not supported"); + + } + else if (c < 0) { + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *lb = lp->lower_bound(); + lb = ocg->CreateMinus(NULL, lb); + exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_GE, true); + CG_outputRepr *ub = lp->upper_bound(); + ub = ocg->CreateMinus(NULL, ub); + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_GE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_LE, true); + else if (cond == IR_COND_GT) + exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_LT, true); + else + throw ir_error("loop condition not supported"); + + vars_to_be_reversed.append(lp->index()->name()); + } + else + throw ir_error("loop step size zero"); + } + catch (const ir_error &e) { + for (int i = 0; i < itn->children.size(); i++) + delete itn->children[i]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + return false; + } + + if (abs(c) != 1) { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e = f_exists->declare(); + F_And *f_and = f_exists->add_and(); + Stride_Handle h = f_and->add_stride(abs(c)); + if (c > 0) + h.update_coef(e, 1); + else + h.update_coef(e, -1); + h.update_coef(v, -1); + CG_outputRepr *lb = lp->lower_bound(); + exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ, true); + } + + processed[itn->payload] = true; + break; + } + case IR_CONTROL_IF: { + CG_outputRepr *cond = static_cast<IR_If *>(itn->content)->condition(); + try { + if (itn->payload % 2 == 1) + exp2constraint(ir, r, f_root, freevar, cond, true); + else { + F_Not *f_not = f_root->add_not(); + F_And *f_and = f_not->add_and(); + exp2constraint(ir, r, f_and, freevar, cond, true); + } + } + catch (const ir_error &e) { + std::vector<ir_tree_node *> *t; + if (itn->parent == NULL) + t = &ir_tree; + else + t = &(itn->parent->children); + int id = itn->payload; + int i = t->size() - 1; + while (i >= 0) { + if ((*t)[i] == itn) { + for (int j = 0; j < itn->children.size(); j++) + delete itn->children[j]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + } + else if ((*t)[i]->payload >> 1 == id >> 1) { + delete (*t)[i]; + t->erase(t->begin()+i); + } + i--; + } + return false; + } + + break; + } + default: + for (int i = 0; i < itn->children.size(); i++) + delete itn->children[i]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + return false; + } + } + + // add information for missing loops + for (int j = 0; j < n_dim; j++) + if (!processed[j]) { + ir_tree_node *itn = ir_stmt[max_loc]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == j) + break; + } + + Variable_ID v = r.set_var(j+1); + if (loc < max_loc) { + CG_outputRepr *lb = static_cast<IR_Loop *>(itn->content)->lower_bound(); + exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ, true); + } + else { // loc > max_loc + CG_outputRepr *ub = static_cast<IR_Loop *>(itn->content)->upper_bound(); + exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ, true); + } + } + + r.setup_names(); + r.simplify(); + + // insert the statement + CG_outputBuilder *ocg = ir->builder(); + Tuple<CG_outputRepr *> reverse_expr; + for (int j = 1; j <= vars_to_be_reversed.size(); j++) { + CG_outputRepr *repl = ocg->CreateIdent(vars_to_be_reversed[j]); + repl = ocg->CreateMinus(NULL, repl); + reverse_expr.append(repl); + } + CG_outputRepr *code = static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); + code = ocg->CreatePlaceHolder(0, code, reverse_expr, vars_to_be_reversed); + stmt[loc].code = code; + stmt[loc].IS = r; + stmt[loc].loop_level = std::vector<LoopLevel>(n_dim); + for (int i = 0; i < n_dim; i++) { + stmt[loc].loop_level[i].type = LoopLevelOriginal; + stmt[loc].loop_level[i].payload = i; + stmt[loc].loop_level[i].parallel_level = 0; + } + + stmt_nesting_level[loc] = -1; + } + + return true; +} + + + +Loop::Loop(const IR_Control *control) { + ir = const_cast<IR_Code *>(control->ir_); + init_code = NULL; + cleanup_code = NULL; + tmp_loop_var_name_counter = 1; + overflow_var_name_counter = 1; + known = Relation::True(0); + + std::vector<ir_tree_node *> ir_tree = build_ir_tree(control->clone(), NULL); + std::vector<ir_tree_node *> ir_stmt; + + while (!init_loop(ir_tree, ir_stmt)) {} + + // init the dependence graph + for (int i = 0; i < stmt.size(); i++) + dep.insert(); + + for (int i = 0; i < stmt.size(); i++) + for (int j = i; j < stmt.size(); j++) { + std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > dv = test_data_dependences(ir, stmt[i].code, stmt[i].IS, stmt[j].code, stmt[j].IS, freevar); + + for (int k = 0; k < dv.first.size(); k++) + if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k], true)) + dep.connect(i, j, dv.first[k]); + else + dep.connect(j, i, dv.first[k].reverse()); + + for (int k = 0; k < dv.second.size(); k++) + if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k], false)) + dep.connect(j, i, dv.second[k]); + else + dep.connect(i, j, dv.second[k].reverse()); + } + + // cleanup the IR tree + for (int i = 0; i < ir_tree.size(); i++) + delete ir_tree[i]; + + // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0] + for (int i = 0; i < stmt.size(); i++) { + int n = stmt[i].IS.n_set(); + stmt[i].xform = Relation(n, 2*n+1); + F_And *f_root = stmt[i].xform.add_and(); + + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(stmt[i].xform.output_var(2*j), 1); + h.update_coef(stmt[i].xform.input_var(j), -1); + } + + for (int j = 1; j <= 2*n+1; j+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(stmt[i].xform.output_var(j), 1); + } + stmt[i].xform.simplify(); + } + + if (stmt.size() != 0) + num_dep_dim = stmt[0].IS.n_set(); + else + num_dep_dim = 0; +} + + +Loop::~Loop() { + for (int i = 0; i < stmt.size(); i++) + if (stmt[i].code != NULL) { + stmt[i].code->clear(); + delete stmt[i].code; + } + if (init_code != NULL) { + init_code->clear(); + delete init_code; + } + if (cleanup_code != NULL) { + cleanup_code->clear(); + delete cleanup_code; + } +} + + +int Loop::get_dep_dim_of(int stmt_num, int level) const { + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invaid statement " + to_string(stmt_num)); + + if (level < 1 || level > stmt[stmt_num].loop_level.size()) + return -1; + + int trip_count = 0; + while (true) { + switch (stmt[stmt_num].loop_level[level-1].type) { + case LoopLevelOriginal: + return stmt[stmt_num].loop_level[level-1].payload; + case LoopLevelTile: + level = stmt[stmt_num].loop_level[level-1].payload; + if (level < 1) + return -1; + if (level > stmt[stmt_num].loop_level.size()) + throw loop_error("incorrect loop level information for statement " + to_string(stmt_num)); + break; + default: + throw loop_error("unknown loop level information for statement " + to_string(stmt_num)); + } + trip_count++; + if (trip_count >= stmt[stmt_num].loop_level.size()) + throw loop_error("incorrect loop level information for statement " + to_string(stmt_num)); + } +} + + +int Loop::get_last_dep_dim_before(int stmt_num, int level) const { + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invaid statement " + to_string(stmt_num)); + + if (level < 1) + return -1; + if (level > stmt[stmt_num].loop_level.size()) + level = stmt[stmt_num].loop_level.size() + 1; + + for (int i = level-1; i >= 1; i--) + if (stmt[stmt_num].loop_level[i-1].type == LoopLevelOriginal) + return stmt[stmt_num].loop_level[i-1].payload; + + return -1; +} + + +void Loop::print_internal_loop_structure() const { + for (int i = 0; i < stmt.size(); i++) { + std::vector<int> lex = getLexicalOrder(i); + std::cout << "s" << i+1 << ": "; + for (int j = 0; j < stmt[i].loop_level.size(); j++) { + if (2*j < lex.size()) + std::cout << lex[2*j]; + switch (stmt[i].loop_level[j].type) { + case LoopLevelOriginal: + std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; + break; + case LoopLevelTile: + std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; + break; + default: + std::cout << "(unknown)"; + } + std::cout << ' '; + } + for (int j = 2*stmt[i].loop_level.size(); j < lex.size(); j+=2) { + std::cout << lex[j]; + if (j != lex.size()-1) + std::cout << ' '; + } + std::cout << std::endl; + } +} + + +CG_outputRepr *Loop::getCode(int effort) const { + const int m = stmt.size(); + if (m == 0) + return NULL; + const int n = stmt[0].xform.n_out(); + + Tuple<CG_outputRepr *> ni(m); + Tuple<Relation> IS(m); + Tuple<Relation> xform(m); + for (int i = 0; i < m; i++) { + ni[i+1] = stmt[i].code; + IS[i+1] = stmt[i].IS; + xform[i+1] = stmt[i].xform; + } + + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *repr = MMGenerateCode(ocg, xform, IS, ni, known, effort); + + if (init_code != NULL) + repr = ocg->StmtListAppend(init_code->clone(), repr); + if (cleanup_code != NULL) + repr = ocg->StmtListAppend(repr, cleanup_code->clone()); + + return repr; +} + + +void Loop::printCode(int effort) const { + const int m = stmt.size(); + if (m == 0) + return; + const int n = stmt[0].xform.n_out(); + + Tuple<Relation> IS(m); + Tuple<Relation> xform(m); + for (int i = 0; i < m; i++) { + IS[i+1] = stmt[i].IS; + xform[i+1] = stmt[i].xform; + } + + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + std::cout << MMGenerateCode(xform, IS, known, effort); +} + + +Relation Loop::getNewIS(int stmt_num) const { + Relation result; + + if (stmt[stmt_num].xform.is_null()) { + Relation known = Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set()); + result = Intersection(copy(stmt[stmt_num].IS), known); + } + else { + Relation known = Extend_Set(copy(this->known), stmt[stmt_num].xform.n_out() - this->known.n_set()); + result = Intersection(Range(Restrict_Domain(copy(stmt[stmt_num].xform), copy(stmt[stmt_num].IS))), known); + } + + result.simplify(2, 4); + + return result; +} + +std::vector<Relation> Loop::getNewIS() const { + const int m = stmt.size(); + + std::vector<Relation> new_IS(m); + for (int i = 0; i < m; i++) + new_IS[i] = getNewIS(i); + + return new_IS; +} + + +void Loop::permute(const std::vector<int> &pi) { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + + permute(active, pi); +} + + +void Loop::original() { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + setLexicalOrder(0, active); +} + + +void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) { + if (active.size() == 0 || pi.size() == 0) + return; + + // check for sanity of parameters + int level = pi[0]; + for (int i = 1; i < pi.size(); i++) + if (pi[i] < level) + level = pi[i]; + if (level < 1) + throw std::invalid_argument("invalid permuation"); + std::vector<int> reverse_pi(pi.size(), 0); + for (int i = 0; i < pi.size(); i++) + if (pi[i] >= level+pi.size()) + throw std::invalid_argument("invalid permutation"); + else + reverse_pi[pi[i]-level] = i+level; + for (int i = 0; i < reverse_pi.size(); i++) + if (reverse_pi[i] == 0) + throw std::invalid_argument("invalid permuation"); + int ref_stmt_num; + std::vector<int> lex; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(*i)); + if (i == active.begin()) { + ref_stmt_num = *i; + lex = getLexicalOrder(*i); + } + else { + if (level+pi.size()-1 > stmt[*i].loop_level.size()) + throw std::invalid_argument("invalid permuation"); + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 0; j < 2*level-3; j+=2) + if (lex[j] != lex2[j]) + throw std::invalid_argument("statements to permute must be in the same subloop"); + for (int j = 0; j < pi.size(); j++) + if (!(stmt[*i].loop_level[level+j-1].type == stmt[ref_stmt_num].loop_level[level+j-1].type && + stmt[*i].loop_level[level+j-1].payload == stmt[ref_stmt_num].loop_level[level+j-1].payload)) + throw std::invalid_argument("permuted loops must have the same loop level types"); + } + } + + // Update transformation relations + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= n; j+= 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 0; j < pi.size(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2*(level+j)), 1); + h.update_coef(mapping.input_var(2*pi[j]), -1); + } + for (int j = 1; j < level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2*j), 1); + h.update_coef(mapping.input_var(2*j), -1); + } + for (int j = level+pi.size(); j <= stmt[*i].loop_level.size(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2*j), 1); + h.update_coef(mapping.input_var(2*j), -1); + } + + stmt[*i].xform = Composition(mapping, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // get the permuation for dependence vectors + std::vector<int> t; + for (int i = 0; i < pi.size(); i++) + if (stmt[ref_stmt_num].loop_level[pi[i]-1].type == LoopLevelOriginal) + t.push_back(stmt[ref_stmt_num].loop_level[pi[i]-1].payload); + int max_dep_dim = -1; + int min_dep_dim = num_dep_dim; + for (int i = 0; i < t.size(); i++) { + if (t[i] > max_dep_dim) + max_dep_dim = t[i]; + if (t[i] < min_dep_dim) + min_dep_dim = t[i]; + } + if (min_dep_dim > max_dep_dim) + return; + if (max_dep_dim - min_dep_dim + 1 != t.size()) + throw loop_error("cannot update the dependence graph after permuation"); + std::vector<int> dep_pi(num_dep_dim); + for (int i = 0; i < min_dep_dim; i++) + dep_pi[i] = i; + for (int i = min_dep_dim; i <= max_dep_dim; i++) + dep_pi[i] = t[i-min_dep_dim]; + for (int i = max_dep_dim+1; i < num_dep_dim; i++) + dep_pi[i] = i; + + // update the dependence graph + DependenceGraph g; + for (int i = 0; i < dep.vertex.size(); i++) + g.insert(); + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { + if ((active.find(i) != active.end() && active.find(j->first) != active.end())) { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) { + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(num_dep_dim); + std::vector<coef_t> ubounds(num_dep_dim); + for (int d = 0; d < num_dep_dim; d++) { + lbounds[d] = dv[k].lbounds[dep_pi[d]]; + ubounds[d] = dv[k].ubounds[dep_pi[d]]; + } + dv[k].lbounds = lbounds; + dv[k].ubounds = ubounds; + break; + } + case DEP_CONTROL: { + break; + } + default: + throw loop_error("unknown dependence type"); + } + } + g.connect(i, j->first, dv); + } + else if (active.find(i) == active.end() && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dv = j->second; + g.connect(i, j->first, dv); + } + else { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + for (int d = 0; d < num_dep_dim; d++) + if (dep_pi[d] != d) { + dv[k].lbounds[d] = -posInfinity; + dv[k].ubounds[d] = posInfinity; + } + break; + } + case DEP_CONTROL: + break; + default: + throw loop_error("unknown dependence type"); + } + g.connect(i, j->first, dv); + } + } + dep = g; + + // update loop level information + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int cur_dep_dim = min_dep_dim; + std::vector<LoopLevel> new_loop_level(stmt[*i].loop_level.size()); + for (int j = 1; j <= stmt[*i].loop_level.size(); j++) + if (j >= level && j < level+pi.size()) { + switch (stmt[*i].loop_level[reverse_pi[j-level]-1].type) { + case LoopLevelOriginal: + new_loop_level[j-1].type = LoopLevelOriginal; + new_loop_level[j-1].payload = cur_dep_dim++; + new_loop_level[j-1].parallel_level = stmt[*i].loop_level[reverse_pi[j-level]-1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j-1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[reverse_pi[j-level]-1].payload; + if (ref_level >= level && ref_level < level+pi.size()) + new_loop_level[j-1].payload = reverse_pi[ref_level-level]; + else + new_loop_level[j-1].payload = ref_level; + new_loop_level[j-1].parallel_level = stmt[*i].loop_level[reverse_pi[j-level]-1].parallel_level; + break; + } + default: + throw loop_error("unknown loop level information for statement " + to_string(*i)); + } + } + else { + switch (stmt[*i].loop_level[j-1].type) { + case LoopLevelOriginal: + new_loop_level[j-1].type = LoopLevelOriginal; + new_loop_level[j-1].payload = stmt[*i].loop_level[j-1].payload; + new_loop_level[j-1].parallel_level = stmt[*i].loop_level[j-1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j-1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[j-1].payload; + if (ref_level >= level && ref_level < level+pi.size()) + new_loop_level[j-1].payload = reverse_pi[ref_level-level]; + else + new_loop_level[j-1].payload = ref_level; + new_loop_level[j-1].parallel_level = stmt[*i].loop_level[j-1].parallel_level; + break; + } + default: + throw loop_error("unknown loop level information for statement " + to_string(*i)); + } + } + stmt[*i].loop_level = new_loop_level; + } + + setLexicalOrder(2*level-2, active); +} + +std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + std::set<int> result; + int dim = 2*level-1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim-1); + + Relation cond2 = copy(cond); + cond2.simplify(); + cond2 = EQs_to_GEQs(cond2); + Conjunct *c = cond2.single_conjunct(); + int cur_lex = lex[dim-1]; + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int max_level = (*gi).max_tuple_pos(); + Relation single_cond(max_level); + single_cond.and_with_GEQ(*gi); + + // TODO: should decide where to place newly created statements with + // complementary split condition from dependence graph. + bool place_after; + if (max_level == 0) + place_after = true; + else if ((*gi).get_coef(cond2.set_var(max_level)) < 0) + place_after = true; + else + place_after = false; + + // make adjacent lexical number available for new statements + if (place_after) { + lex[dim-1] = cur_lex+1; + shiftLexicalOrder(lex, dim-1, 1); + } + else { + lex[dim-1] = cur_lex-1; + shiftLexicalOrder(lex, dim-1, -1); + } + + // original statements with split condition, + // new statements with complement of split condition + int old_num_stmt = stmt.size(); + std::map<int, int> what_stmt_num; + apply_xform(same_loop); + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + int n = stmt[*i].IS.n_set(); + Relation part1, part2; + if (max_level > n) { + part1 = copy(stmt[*i].IS); + part2 = Relation::False(0); + } + else { + part1 = Intersection(copy(stmt[*i].IS), Extend_Set(copy(single_cond), n-max_level)); + part2 = Intersection(copy(stmt[*i].IS), Extend_Set(Complement(copy(single_cond)), n-max_level)); + } + + stmt[*i].IS = part1; + + if (Intersection(copy(part2), Extend_Set(copy(this->known), n-this->known.n_set())).is_upper_bound_satisfiable()) { + Statement new_stmt; + new_stmt.code = stmt[*i].code->clone(); + new_stmt.IS = part2; + new_stmt.xform = copy(stmt[*i].xform); + if (place_after) + assign_const(new_stmt.xform, dim-1, cur_lex+1); + else + assign_const(new_stmt.xform, dim-1, cur_lex-1); + new_stmt.loop_level = stmt[*i].loop_level; + stmt.push_back(new_stmt); + dep.insert(); + what_stmt_num[*i] = stmt.size() - 1; + if (*i == stmt_num) + result.insert(stmt.size() - 1); + } + } + + // update dependence graph + int dep_dim = get_dep_dim_of(stmt_num, level); + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, std::vector<DependenceVector> > > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + if (what_stmt_num.find(i) != what_stmt_num.end() && what_stmt_num.find(j->first) != what_stmt_num.end()) + dep.connect(what_stmt_num[i], what_stmt_num[j->first], j->second); + if (place_after && what_stmt_num.find(j->first) != what_stmt_num.end()) { + std::vector<DependenceVector> dvs; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.is_data_dependence() && dep_dim != -1) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + dvs.push_back(dv); + } + if (dvs.size() > 0) + D.push_back(std::make_pair(what_stmt_num[j->first], dvs)); + } + else if (!place_after && what_stmt_num.find(i) != what_stmt_num.end()) { + std::vector<DependenceVector> dvs; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.is_data_dependence() && dep_dim != -1) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + dvs.push_back(dv); + } + if (dvs.size() > 0) + dep.connect(what_stmt_num[i], j->first, dvs); + + } + } + else { + if (what_stmt_num.find(i) != what_stmt_num.end()) + dep.connect(what_stmt_num[i], j->first, j->second); + } + } + else if (same_loop.find(j->first) != same_loop.end()) { + if (what_stmt_num.find(j->first) != what_stmt_num.end()) + D.push_back(std::make_pair(what_stmt_num[j->first], j->second)); + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + } + + return result; +} + + + +void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, TilingMethodType method, int alignment_offset, int alignment_multiple) { + // check for sanity of parameters + if (tile_size < 0) + throw std::invalid_argument("invalid tile size"); + if (alignment_multiple < 1 || alignment_offset < 0) + throw std::invalid_argument("invalid alignment for tile"); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0) + throw std::invalid_argument("invalid loop level " + to_string(level)); + if (level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("there is no loop level " + to_string(level) + " for statement " + to_string(stmt_num)); + if (outer_level <= 0 || outer_level > level) + throw std::invalid_argument("invalid tile controlling loop level " + to_string(outer_level)); + + int dim = 2*level-1; + int outer_dim = 2*outer_level-1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_tiled_loop = getStatements(lex, dim-1); + std::set<int> same_tile_controlling_loop = getStatements(lex, outer_dim-1); + + // special case for no tiling + if (tile_size == 0) { + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { + Relation r(stmt[*i].xform.n_out(),stmt[*i].xform.n_out()+2); + F_And *f_root = r.add_and(); + for (int j = 1; j <= 2*outer_level-1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + } + EQ_Handle h1 = f_root->add_EQ(); + h1.update_coef(r.output_var(2*outer_level), 1); + EQ_Handle h2 = f_root->add_EQ(); + h2.update_coef(r.output_var(2*outer_level+1), 1); + for (int j = 2*outer_level; j <= stmt[*i].xform.n_out(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j+2), -1); + } + + stmt[*i].xform = Composition(copy(r), stmt[*i].xform); + } + } + // normal tiling + else { + std::set<int> private_stmt; + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { +// if (same_tiled_loop.find(*i) == same_tiled_loop.end() && !is_single_iteration(getNewIS(*i), dim)) +// same_tiled_loop.insert(*i); + + // should test dim's value directly but it is ok for now +// if (same_tiled_loop.find(*i) == same_tiled_loop.end() && get_const(stmt[*i].xform, dim+1, Output_Var) == posInfinity) + if (same_tiled_loop.find(*i) == same_tiled_loop.end() && overflow.find(*i) != overflow.end()) + private_stmt.insert(*i); + } + + + // extract the union of the iteration space to be considered + Relation hull; + { + Tuple<Relation> r_list; + Tuple<int> r_mask; + + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) + if (private_stmt.find(*i) == private_stmt.end()) { + Relation r = project_onto_levels(getNewIS(*i), dim+1, true); + for (int j = outer_dim; j < dim; j++) + r = Project(r, j+1, Set_Var); + for (int j = 0; j < outer_dim; j += 2) + r = Project(r, j+1, Set_Var); + r_list.append(r); + r_mask.append(1); + } + + hull = Hull(r_list, r_mask, 1, true); + } + + // extract the bound of the dimension to be tiled + Relation bound = get_loop_bound(hull, dim); + if (!bound.has_single_conjunct()) { + // further simplify the bound + hull = Approximate(hull); + bound = get_loop_bound(hull, dim); + + int i = outer_dim - 2; + while (!bound.has_single_conjunct() && i >= 0) { + hull = Project(hull, i+1, Set_Var); + bound = get_loop_bound(hull, dim); + i -= 2; + } + + if (!bound.has_single_conjunct()) + throw loop_error("cannot handle tile bounds"); + } + + // separate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + { + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(dim+1)); + if (coef < 0) + ub_list.push_back(*gi); + else if (coef > 0) + lb_list.push_back(*gi); + } + } + if (lb_list.size() == 0) + throw loop_error("unable to calculate tile controlling loop lower bound"); + if (ub_list.size() == 0) + throw loop_error("unable to calculate tile controlling loop upper bound"); + + // find the simplest lower bound for StridedTile or simplest iteration count for CountedTile + int simplest_lb = 0, simplest_ub = 0; + if (method == StridedTile) { + int best_cost = INT_MAX; + for (int i = 0; i < lb_list.size(); i++) { + int cost = 0; + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + cost += 5; + break; + } + case Global_Var: { + cost += 2; + break; + } + default: + cost += 15; + break; + } + } + + if (cost < best_cost) { + best_cost = cost; + simplest_lb = i; + } + } + } + else if (method == CountedTile) { + std::map<Variable_ID, coef_t> s1, s2, s3; + int best_cost = INT_MAX; + for (int i = 0; i < lb_list.size(); i++) + for (int j = 0; j < ub_list.size(); j++) { + int cost = 0; + + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + cost = INT_MAX-2; + break; + } + } + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + if (cost == INT_MAX-2) + cost = INT_MAX-1; + else + cost = INT_MAX-3; + break; + } + } + + if (cost == 0) { + for (std::map<Variable_ID, coef_t>::iterator k = s1.begin(); k != s1.end(); k++) + if ((*k).second != 0) + cost += 5; + for (std::map<Variable_ID, coef_t>::iterator k = s2.begin(); k != s2.end(); k++) + if ((*k).second != 0) + cost += 2; + for (std::map<Variable_ID, coef_t>::iterator k = s3.begin(); k != s3.end(); k++) + if ((*k).second != 0) + cost += 15; + } + + if (cost < best_cost) { + best_cost = cost; + simplest_lb = i; + simplest_ub = j; + } + } + } + + // prepare the new transformation relations + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { + Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out()+2); + F_And *f_root = r.add_and(); + for (int j = 0; j < outer_dim-1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(j+1), 1); + h.update_coef(r.input_var(j+1), -1); + } + + for (int j = outer_dim-1; j < stmt[*i].xform.n_out(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(j+3), 1); + h.update_coef(r.input_var(j+1), -1); + } + + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(outer_dim), 1); + h.update_const(-lex[outer_dim-1]); + + stmt[*i].xform = Composition(r, stmt[*i].xform); + } + + // add tiling constraints. + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { + F_And *f_super_root = stmt[*i].xform.and_with_and(); + F_Exists *f_exists = f_super_root->add_exists(); + F_And *f_root = f_exists->add_and(); + + // create a lower bound variable for easy formula creation later + Variable_ID aligned_lb; + { + Variable_ID lb = f_exists->declare(); + coef_t coef = lb_list[simplest_lb].get_coef(bound.set_var(dim+1)); + if (coef == 1) { // e.g. if i >= m+5, then LB = m+5 + EQ_Handle h = f_root->add_EQ(); + h.update_coef(lb, 1); + for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h.update_const(lb_list[simplest_lb].get_const()); + } + else { // e.g. if 2i >= m+5, then m+5 <= 2*LB < m+5+2 + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(lb, (*ci).coef); + h2.update_coef(lb, -(*ci).coef); + } + else { + h1.update_coef(stmt[*i].xform.output_var(pos), (*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), -(*ci).coef); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, (*ci).var->function_of()); + h1.update_coef(v, (*ci).coef); + h2.update_coef(v, -(*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h1.update_const(lb_list[simplest_lb].get_const()); + h2.update_const(-lb_list[simplest_lb].get_const()); + h2.update_const(coef-1); + } + + Variable_ID offset_lb; + if (alignment_offset == 0) + offset_lb = lb; + else { + EQ_Handle h = f_root->add_EQ(); + offset_lb = f_exists->declare(); + h.update_coef(offset_lb, 1); + h.update_coef(lb, -1); + h.update_const(alignment_offset); + } + + if (alignment_multiple == 1) { // trivial + aligned_lb = offset_lb; + } + else { // e.g. to align at 4, aligned_lb = 4*alpha && LB-4 < 4*alpha <= LB + aligned_lb = f_exists->declare(); + Variable_ID e = f_exists->declare(); + + EQ_Handle h = f_root->add_EQ(); + h.update_coef(aligned_lb, 1); + h.update_coef(e, -alignment_multiple); + + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + h1.update_coef(e, alignment_multiple); + h2.update_coef(e, -alignment_multiple); + h1.update_coef(offset_lb, -1); + h2.update_coef(offset_lb, 1); + h1.update_const(alignment_multiple-1); + } + } + + // create an upper bound variable for easy formula creation later + Variable_ID ub = f_exists->declare(); + { + coef_t coef = -ub_list[simplest_ub].get_coef(bound.set_var(dim+1)); + if (coef == 1) { // e.g. if i <= m+5, then UB = m+5 + EQ_Handle h = f_root->add_EQ(); + h.update_coef(ub, -1); + for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h.update_const(ub_list[simplest_ub].get_const()); + } + else { // e.g. if 2i <= m+5, then m+5-2 < 2*UB <= m+5 + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(ub, -(*ci).coef); + h2.update_coef(ub, (*ci).coef); + } + else { + h1.update_coef(stmt[*i].xform.output_var(pos), -(*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), (*ci).coef); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, (*ci).var->function_of()); + h1.update_coef(v, -(*ci).coef); + h2.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h1.update_const(-ub_list[simplest_ub].get_const()); + h2.update_const(ub_list[simplest_ub].get_const()); + h1.update_const(coef-1); + } + } + + // insert tile controlling loop constraints + if (method == StridedTile) { // e.g. ii = LB + 32 * alpha && alpha >= 0 + Variable_ID e = f_exists->declare(); + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(e, 1); + + EQ_Handle h2 = f_root->add_EQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim+1), 1); + h2.update_coef(e, -tile_size); + h2.update_coef(aligned_lb, -1); + } + else if (method == CountedTile) { // e.g. 0 <= ii < ceiling((UB-LB+1)/32) + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(outer_dim+1), 1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim+1), -tile_size); + h2.update_coef(aligned_lb, -1); + h2.update_coef(ub, 1); + } + + // special care for private statements like overflow assignment + if (private_stmt.find(*i) != private_stmt.end()) { // e.g. ii <= UB + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + h.update_coef(ub, 1); + } + // if (private_stmt.find(*i) != private_stmt.end()) { + // if (stmt[*i].xform.n_out() > dim+3) { // e.g. ii <= UB && i = ii + // GEQ_Handle h = f_root->add_GEQ(); + // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + // h.update_coef(ub, 1); + + // stmt[*i].xform = Project(stmt[*i].xform, dim+3, Output_Var); + // f_root = stmt[*i].xform.and_with_and(); + // EQ_Handle h1 = f_root->add_EQ(); + // h1.update_coef(stmt[*i].xform.output_var(dim+3), 1); + // h1.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + // } + // else if (method == StridedTile) { // e.g. ii <= UB since i does not exist + // GEQ_Handle h = f_root->add_GEQ(); + // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + // h.update_coef(ub, 1); + // } + // } + + // restrict original loop index inside the tile + else { + if (method == StridedTile) { // e.g. ii <= i < ii + tile_size + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(dim+3), 1); + h1.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(dim+3), -1); + h2.update_coef(stmt[*i].xform.output_var(outer_dim+1), 1); + h2.update_const(tile_size-1); + } + else if (method == CountedTile) { // e.g. LB+32*ii <= i < LB+32*ii+tile_size + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(outer_dim+1), -tile_size); + h1.update_coef(stmt[*i].xform.output_var(dim+3), 1); + h1.update_coef(aligned_lb, -1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim+1), tile_size); + h2.update_coef(stmt[*i].xform.output_var(dim+3), -1); + h2.update_const(tile_size-1); + h2.update_coef(aligned_lb, 1); + } + } + } + } + + // update loop level information + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); i != same_tile_controlling_loop.end(); i++) { + for (int j = 1; j <= stmt[*i].loop_level.size(); j++) + switch (stmt[*i].loop_level[j-1].type) { + case LoopLevelOriginal: + break; + case LoopLevelTile: + if (stmt[*i].loop_level[j-1].payload >= outer_level) + stmt[*i].loop_level[j-1].payload++; + break; + default: + throw loop_error("unknown loop level type for statement " + to_string(*i)); + } + + LoopLevel ll; + ll.type = LoopLevelTile; + ll.payload = level+1; + ll.parallel_level = 0; + stmt[*i].loop_level.insert(stmt[*i].loop_level.begin()+(outer_level-1), ll); + } +} + + + +std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount) { + // check for sanity of parameters + if (unroll_amount < 0) + throw std::invalid_argument("invalid unroll amount " + to_string(unroll_amount)); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + int dim = 2*level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim-1); + + // nothing to do + if (unroll_amount == 1) + return std::set<int>(); + + // extract the intersection of the iteration space to be considered + Relation hull = Relation::True(level); + apply_xform(same_loop); + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + if (stmt[*i].IS.is_upper_bound_satisfiable()) { + Relation mapping(stmt[*i].IS.n_set(), level); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(j), 1); + h.update_coef(mapping.output_var(j), -1); + } + hull = Intersection(hull, Range(Restrict_Domain(mapping, copy(stmt[*i].IS)))); + hull.simplify(2, 4); + } + } + for (int i = 1; i <= level; i++) { + std::string name = tmp_loop_var_name_prefix + to_string(i); + hull.name_set_var(i, name); + } + hull.setup_names(); + + // extract the exact loop bound of the dimension to be unrolled + if (is_single_loop_iteration(hull, level, this->known)) + return std::set<int>(); + Relation bound = get_loop_bound(hull, level, this->known); + if (!bound.has_single_conjunct() || !bound.is_satisfiable() || bound.is_tautology()) + throw loop_error("unable to extract loop bound for unrolling"); + + // extract the loop stride + EQ_Handle stride_eq; + int stride = 1; + { + bool simple_stride = true; + int strides = countStrides(bound.query_DNF()->single_conjunct(), bound.set_var(level), stride_eq, simple_stride); + if (strides > 1) + throw loop_error("too many strides"); + else if (strides == 1) { + int sign = stride_eq.get_coef(bound.set_var(level)); + Constr_Vars_Iter it(stride_eq, true); + stride = abs((*it).coef/sign); + } + } + + // separate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + { + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(level)); + if (coef < 0) + ub_list.push_back(*gi); + else if (coef > 0) + lb_list.push_back(*gi); + } + } + + // simplify overflow expression for each pair of upper and lower bounds + std::vector<std::vector<std::map<Variable_ID, int> > > overflow_table(lb_list.size(), std::vector<std::map<Variable_ID, int> >(ub_list.size(), std::map<Variable_ID, int>())); + bool is_overflow_simplifiable = true; + for (int i = 0; i < lb_list.size(); i++) { + if (!is_overflow_simplifiable) + break; + + for (int j = 0; j < ub_list.size(); j++) { + // lower bound or upper bound has non-unit coefficient, can't simplify + if (ub_list[j].get_coef(bound.set_var(level)) != -1 || lb_list[i].get_coef(bound.set_var(level)) != 1) { + is_overflow_simplifiable = false; + break; + } + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch((*ci).var->kind()) { + case Input_Var: + { + if ((*ci).var != bound.set_var(level)) + overflow_table[i][j][(*ci).var] += (*ci).coef; + + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); + overflow_table[i][j][(*ci).var] += (*ci).coef; + break; + } + default: + throw loop_error("failed to calculate overflow amount"); + } + } + overflow_table[i][j][NULL] += ub_list[j].get_const(); + + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch((*ci).var->kind()) { + case Input_Var: + { + if ((*ci).var != bound.set_var(level)) { + overflow_table[i][j][(*ci).var] += (*ci).coef; + if (overflow_table[i][j][(*ci).var] == 0) + overflow_table[i][j].erase(overflow_table[i][j].find((*ci).var)); + } + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); + overflow_table[i][j][(*ci).var] += (*ci).coef; + if (overflow_table[i][j][(*ci).var] == 0) + overflow_table[i][j].erase(overflow_table[i][j].find((*ci).var)); + break; + } + default: + throw loop_error("failed to calculate overflow amount"); + } + } + overflow_table[i][j][NULL] += lb_list[i].get_const(); + + overflow_table[i][j][NULL] += stride; + if (unroll_amount == 0 || (overflow_table[i][j].size() == 1 && overflow_table[i][j][NULL]/stride < unroll_amount)) + unroll_amount = overflow_table[i][j][NULL]/stride; + } + } + + // loop iteration count can't be determined, bail out gracefully + if (unroll_amount == 0) + return std::set<int>(); + + // further simply overflow calculation using coefficients' modular + if (is_overflow_simplifiable) { + for (int i = 0; i < lb_list.size(); i++) + for (int j = 0; j < ub_list.size(); j++) + if (stride == 1) { + for (std::map<Variable_ID, int>::iterator k = overflow_table[i][j].begin(); k != overflow_table[i][j].end(); ) + if ((*k).first != NULL) { + int t = int_mod_hat((*k).second, unroll_amount); + if (t == 0) { + overflow_table[i][j].erase(k++); + } + else { + int t2 = hull.query_variable_mod((*k).first, unroll_amount); + if (t2 != INT_MAX) { + overflow_table[i][j][NULL] += t * t2; + overflow_table[i][j].erase(k++); + } + else { + (*k).second = t; + k++; + } + } + } + else + k++; + + overflow_table[i][j][NULL] = int_mod_hat(overflow_table[i][j][NULL], unroll_amount); + + // Since we don't have MODULO instruction in SUIF yet (only MOD), make all coef positive in the final formula + for (std::map<Variable_ID, int>::iterator k = overflow_table[i][j].begin(); k != overflow_table[i][j].end(); k++) + if ((*k).second < 0) + (*k).second += unroll_amount; + } + } + + + // build overflow statement + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *overflow_code = NULL; + Relation cond_upper(level), cond_lower(level); + Relation overflow_constraint(0); + F_And *overflow_constraint_root = overflow_constraint.add_and(); + std::vector<Free_Var_Decl *> over_var_list; + if (is_overflow_simplifiable && lb_list.size() == 1) { + for (int i = 0; i < ub_list.size(); i++) { + if (overflow_table[0][i].size() == 1) { + // upper splitting condition + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_const(((overflow_table[0][i][NULL]/stride)%unroll_amount) * -stride); + } + else { + // upper splitting condition + std::string over_name = overflow_var_name_prefix + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_coef(cond_upper.get_local(over_free_var), -stride); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount-1); + + // create overflow assignment + bound.setup_names(); + CG_outputRepr *rhs = NULL; + for (std::map<Variable_ID, int>::iterator j = overflow_table[0][i].begin(); j != overflow_table[0][i].end(); j++) + if ((*j).first != NULL) { + CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); + if ((*j).second != 1) + t = ocg->CreateTimes(ocg->CreateInt((*j).second), t); + rhs = ocg->CreatePlus(rhs, t); + } + else + if ((*j).second != 0) + rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); + + if (stride != 1) + rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->StmtListAppend(overflow_code, ocg->CreateAssignment(0, lhs, rhs)); + } + } + + // lower splitting condition + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[0]); + } + else if (is_overflow_simplifiable && ub_list.size() == 1) { + for (int i = 0; i < lb_list.size(); i++) { + + if (overflow_table[i][0].size() == 1) { + // lower splitting condition + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + h.update_const(overflow_table[i][0][NULL] * -stride); + } + else { + // lower splitting condition + std::string over_name = overflow_var_name_prefix + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + h.update_coef(cond_lower.get_local(over_free_var), -stride); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount-1); + + // create overflow assignment + bound.setup_names(); + CG_outputRepr *rhs = NULL; + for (std::map<Variable_ID, int>::iterator j = overflow_table[0][i].begin(); j != overflow_table[0][i].end(); j++) + if ((*j).first != NULL) { + CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); + if ((*j).second != 1) + t = ocg->CreateTimes(ocg->CreateInt((*j).second), t); + rhs = ocg->CreatePlus(rhs, t); + } + else + if ((*j).second != 0) + rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); + + if (stride != 1) + rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->StmtListAppend(overflow_code, ocg->CreateAssignment(0, lhs, rhs)); + } + } + + // upper splitting condition + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[0]); + } + else { + std::string over_name = overflow_var_name_prefix + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + + Tuple<CG_outputRepr *> lb_repr_list, ub_repr_list; + for (int i = 0; i < lb_list.size(); i++) { + //lb_repr_list.append(outputLBasRepr(ocg, lb_list[i], bound, bound.set_var(dim+1), stride, stride_eq, Relation::True(bound.n_set()), std::vector<CG_outputRepr *>(bound.n_set(), NULL))); + lb_repr_list.append(outputLBasRepr(ocg, lb_list[i], bound, bound.set_var(dim+1), stride, stride_eq, Relation::True(bound.n_set()), std::vector<CG_outputRepr *>(bound.n_set()))); + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + } + for (int i = 0; i < ub_list.size(); i++) { + //ub_repr_list.append(outputUBasRepr(ocg, ub_list[i], bound, bound.set_var(dim+1), stride, stride_eq, std::vector<CG_outputRepr *>(bound.n_set(), NULL))); + ub_repr_list.append(outputUBasRepr(ocg, ub_list[i], bound, bound.set_var(dim+1), stride, stride_eq, std::vector<CG_outputRepr *>(bound.n_set()))); + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_coef(cond_upper.get_local(over_free_var), -stride); + } + + CG_outputRepr *lbRepr, *ubRepr; + if (lb_repr_list.size() > 1) + lbRepr = ocg->CreateInvoke("max", lb_repr_list); + else if (lb_repr_list.size() == 1) + lbRepr = lb_repr_list[1]; + + if (ub_repr_list.size() > 1) + ubRepr = ocg->CreateInvoke("min", ub_repr_list); + else if (ub_repr_list.size() == 1) + ubRepr = ub_repr_list[1]; + + // create overflow assignment + bound.setup_names(); + CG_outputRepr *rhs = ocg->CreatePlus(ocg->CreateMinus(ubRepr, lbRepr), ocg->CreateInt(1)); + if (stride != 1) + rhs = ocg->CreateIntegerDivide(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->CreateAssignment(0, lhs, rhs); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount-1); + } + + // insert overflow statement + int overflow_stmt_num = -1; + if (overflow_code != NULL) { + // build iteration space for overflow statement + Relation mapping(level, level-1); + F_And *f_root = mapping.add_and(); + for (int i = 1; i < level; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), 1); + h.update_coef(mapping.input_var(i), -1); + } + Relation overflow_IS = Range(Restrict_Domain(mapping, copy(hull))); + for (int i = 1; i < level; i++) + overflow_IS.name_set_var(i, hull.set_var(i)->name()); + overflow_IS.setup_names(); + + // build dumb transformation relation for overflow statement + Relation overflow_xform(level-1, 2*(level-1)+1); + f_root = overflow_xform.add_and(); + for (int i = 1; i <= level-1; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(overflow_xform.output_var(2*i), 1); + h.update_coef(overflow_xform.input_var(i), -1); + + h = f_root->add_EQ(); + h.update_coef(overflow_xform.output_var(2*i-1), 1); + h.update_const(-lex[2*i-2]); + } + EQ_Handle h = f_root->add_EQ(); + h.update_coef(overflow_xform.output_var(2*(level-1)+1), 1); + h.update_const(-lex[2*(level-1)]); + + shiftLexicalOrder(lex, dim-1, 1); + Statement overflow_stmt; + overflow_stmt.code = overflow_code; + overflow_stmt.IS = overflow_IS; + overflow_stmt.xform = overflow_xform; + overflow_stmt.loop_level = std::vector<LoopLevel>(level-1); + for (int i = 0; i < level-1; i++) { + overflow_stmt.loop_level[i].type = stmt[stmt_num].loop_level[i].type; + if (stmt[stmt_num].loop_level[i].type == LoopLevelTile && + stmt[stmt_num].loop_level[i].payload >= level) + overflow_stmt.loop_level[i].payload = -1; + else + overflow_stmt.loop_level[i].payload = stmt[stmt_num].loop_level[i].payload; + overflow_stmt.loop_level[i].parallel_level = stmt[stmt_num].loop_level[i].parallel_level; + } + stmt.push_back(overflow_stmt); + dep.insert(); + overflow_stmt_num = stmt.size() - 1; + overflow[overflow_stmt_num] = over_var_list; + + // update the global known information on overflow variable + this->known = Intersection(this->known, Extend_Set(copy(overflow_constraint), this->known.n_set()-overflow_constraint.n_set())); + + // update dependence graph + DependenceVector dv; + dv.type = DEP_CONTROL; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) + dep.connect(overflow_stmt_num, *i, dv); + dv.type = DEP_W2W; + { + IR_ScalarSymbol *overflow_sym = NULL; + std::vector<IR_ScalarRef *> scalars = ir->FindScalarRef(overflow_code); + for (int i = scalars.size()-1; i >=0; i--) + if (scalars[i]->is_write()) { + overflow_sym = scalars[i]->symbol(); + break; + } + for (int i = scalars.size()-1; i >=0; i--) + delete scalars[i]; + dv.sym = overflow_sym; + } + dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); + dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); + int dep_dim = get_last_dep_dim_before(stmt_num, level); + for (int i = dep_dim + 1; i < num_dep_dim; i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (int i = 0; i <= dep_dim; i++) { + if (i != 0) { + dv.lbounds[i-1] = 0; + dv.ubounds[i-1] = 0; + } + dv.lbounds[i] = 1; + dv.ubounds[i] = posInfinity; + dep.connect(overflow_stmt_num, overflow_stmt_num, dv); + } + } + + // split the loop so it can be fully unrolled + std::set<int> result = split(stmt_num, level, cond_upper); + std::set<int> result2 = split(stmt_num, level, cond_lower); + for (std::set<int>::iterator i = result2.begin(); i != result2.end(); i++) + result.insert(*i); + + // check if unrolled statements can be trivially lumped together as one statement + bool can_be_lumped = true; + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) + if (*i != stmt_num) { + if (stmt[*i].loop_level.size() != stmt[stmt_num].loop_level.size()) { + can_be_lumped = false; + break; + } + for (int j = 0; j < stmt[stmt_num].loop_level.size(); j++) + if (!(stmt[*i].loop_level[j].type == stmt[stmt_num].loop_level[j].type && + stmt[*i].loop_level[j].payload == stmt[stmt_num].loop_level[j].payload)) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 2*level; j < lex.size()-1; j+=2) + if (lex[j] != lex2[j]) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) + if (is_inner_loop_depend_on_level(stmt[*i].IS, level, known)) { + can_be_lumped = false; + break; + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) + if (*i != stmt_num) { + if (!(Must_Be_Subset(copy(stmt[*i].IS), copy(stmt[stmt_num].IS)) && Must_Be_Subset(copy(stmt[stmt_num].IS), copy(stmt[*i].IS)))) { + can_be_lumped = false; + break; + } + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + for (DependenceGraph::EdgeList::iterator j = dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) + if (same_loop.find(j->first) != same_loop.end()) { + for (int k = 0; k < j->second.size(); k++) + if (j->second[k].type == DEP_CONTROL || j->second[k].type == DEP_UNKNOWN) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + } + if (!can_be_lumped) + break; + } + } + + + // add strides to original statements + // for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) + // add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); + + + // std::vector<Free_Var_Decl *> depending_overflow_var; + // for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + // add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); + // if (overflow.find(*i) != overflow.end()) { + // // TO DO: It should check whether overflow vaiable depends on + // // this loop index and by how much. This step is important if + // // you want to unroll loops in arbitrary order. + // depending_overflow_var.insert(depending_overflow_var.end(), overflow[*i].begin(), overflow[*i].end()); + + // continue; + // } + // } + + + +// std::map<int, std::vector<Statement> > pending; +// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { +// add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); + +// if (overflow.find(*i) != overflow.end()) { +// // TO DO: It should check whether overflow vaiable depends on +// // this loop index and by how much. This step is important if +// // you want to unroll loops in arbitrary order. +// depending_overflow_var.insert(depending_overflow_var.end(), overflow[*i].begin(), overflow[*i].end()); + +// continue; +// } + +// // create copy for each unroll amount +// for (int j = 1; j < unroll_amount; j++) { +// Tuple<CG_outputRepr *> funcList; +// Tuple<std::string> loop_vars; +// loop_vars.append(stmt[*i].IS.set_var((dim+1)/2)->name()); +// funcList.append(ocg->CreatePlus(ocg->CreateIdent(stmt[*i].IS.set_var(level)->name()), ocg->CreateInt(j*stride))); +// CG_outputRepr *code = ocg->CreatePlaceHolder(0, stmt[*i].code->clone(), funcList, loop_vars); + +// // prepare the new statment to insert +// Statement unrolled_stmt; +// unrolled_stmt.IS = copy(stmt[*i].IS); +// // adjust_loop_bound(unrolled_stmt.IS, (dim-1)/2, j); +// unrolled_stmt.xform = copy(stmt[*i].xform); +// unrolled_stmt.code = code; +// unrolled_stmt.loop_level = stmt[*i].loop_level; +// pending[*i].push_back(unrolled_stmt); +// } +// } + +// // adjust iteration space due to loop bounds depending on this loop +// // index and affected overflow variables +// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { +// for (int j = 0; j < pending[*i].size(); j++) { +// adjust_loop_bound(pending[*i][j].IS, (dim-1)/2, j+1, depending_overflow_var); +// //pending[*i][j].IS = Intersection(pending[*i][j].IS, Extend_Set(copy(this->known), pending[*i][j].IS.n_set() - this->known.n_set())); +// } +// } + + // insert unrolled statements + int old_num_stmt = stmt.size(); + if (!can_be_lumped) { + std::map<int, std::vector<int> > what_stmt_num; + + for (int j = 1; j < unroll_amount; j++) { + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + Statement new_stmt; + + Tuple<CG_outputRepr *> funcList; + Tuple<std::string> loop_vars; + loop_vars.append(stmt[*i].IS.set_var(level)->name()); + funcList.append(ocg->CreatePlus(ocg->CreateIdent(stmt[*i].IS.set_var(level)->name()), ocg->CreateInt(j*stride))); + new_stmt.code = ocg->CreatePlaceHolder(0, stmt[*i].code->clone(), funcList, loop_vars); + + new_stmt.IS = adjust_loop_bound(stmt[*i].IS, level, j * stride); + add_loop_stride(new_stmt.IS, bound, level-1, unroll_amount * stride); + + new_stmt.xform = copy(stmt[*i].xform); + new_stmt.loop_level = stmt[*i].loop_level; + stmt.push_back(new_stmt); + dep.insert(); + what_stmt_num[*i].push_back(stmt.size() - 1); + } + } + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) + add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); + + + // update dependence graph + if (stmt[stmt_num].loop_level[level-1].type == LoopLevelOriginal) { + int dep_dim = stmt[stmt_num].loop_level[level-1].payload; + int new_stride = unroll_amount * stride; + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); ) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type == DEP_CONTROL || dv.type == DEP_UNKNOWN) { + D.push_back(std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount - 1; kk++) + if (what_stmt_num[i][kk] != -1 && what_stmt_num[j->first][kk] != -1) + dep.connect(what_stmt_num[i][kk], what_stmt_num[j->first][kk], dv); + } + else { + coef_t lb = dv.lbounds[dep_dim]; + coef_t ub = dv.ubounds[dep_dim]; + if (ub == lb && int_mod(lb, static_cast<coef_t>(new_stride)) == 0) { + D.push_back(std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount - 1; kk++) + if (what_stmt_num[i][kk] != -1 && what_stmt_num[j->first][kk] != -1) + dep.connect(what_stmt_num[i][kk], what_stmt_num[j->first][kk], dv); + } + else if (lb == -posInfinity && ub == posInfinity) { + D.push_back(std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount; kk++) + if (kk == 0) + D.push_back(std::make_pair(j->first, dv)); + else if (what_stmt_num[j->first][kk-1] != -1) + D.push_back(std::make_pair(what_stmt_num[j->first][kk-1], dv)); + for (int t = 0; t < unroll_amount - 1; t++) + if (what_stmt_num[i][t] != -1) + for (int kk = 0; kk < unroll_amount; kk++) + if (kk == 0) + dep.connect(what_stmt_num[i][t], j->first, dv); + else if (what_stmt_num[j->first][kk-1] != -1) + dep.connect(what_stmt_num[i][t], what_stmt_num[j->first][kk-1], dv); + } + else { + for (int kk = 0; kk < unroll_amount; kk++) { + if (lb != -posInfinity) { + if (kk * stride < int_mod(lb, static_cast<coef_t>(new_stride))) + dv.lbounds[dep_dim] = floor(static_cast<double>(lb)/new_stride) * new_stride + new_stride; + else + dv.lbounds[dep_dim] = floor(static_cast<double>(lb)/new_stride) * new_stride; + } + if (ub != posInfinity) { + if (kk * stride > int_mod(ub, static_cast<coef_t>(new_stride))) + dv.ubounds[dep_dim] = floor(static_cast<double>(ub)/new_stride) * new_stride - new_stride; + else + dv.ubounds[dep_dim] = floor(static_cast<double>(ub)/new_stride) * new_stride; + } + if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) { + if (kk == 0) + D.push_back(std::make_pair(j->first, dv)); + else if (what_stmt_num[j->first][kk-1] != -1) + D.push_back(std::make_pair(what_stmt_num[j->first][kk-1], dv)); + } + } + for (int t = 0; t < unroll_amount-1; t++) + if (what_stmt_num[i][t] != -1) + for (int kk = 0; kk < unroll_amount; kk++) { + if (lb != -posInfinity) { + if (kk * stride < int_mod(lb+t+1, static_cast<coef_t>(new_stride))) + dv.lbounds[dep_dim] = floor(static_cast<double>(lb+(t+1)*stride)/new_stride) * new_stride + new_stride; + else + dv.lbounds[dep_dim] = floor(static_cast<double>(lb+(t+1)*stride)/new_stride) * new_stride; + } + if (ub != posInfinity) { + if (kk * stride > int_mod(ub+t+1, static_cast<coef_t>(new_stride))) + dv.ubounds[dep_dim] = floor(static_cast<double>(ub+(t+1)*stride)/new_stride) * new_stride - new_stride; + else + dv.ubounds[dep_dim] = floor(static_cast<double>(ub+(t+1)*stride)/new_stride) * new_stride; + } + if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) { + if (kk == 0) + dep.connect(what_stmt_num[i][t], j->first, dv); + else if (what_stmt_num[j->first][kk-1] != -1) + dep.connect(what_stmt_num[i][t], what_stmt_num[j->first][kk-1], dv); + } + } + } + } + } + + dep.vertex[i].second.erase(j++); + } + else { + for (int kk = 0; kk < unroll_amount - 1; kk++) + if (what_stmt_num[i][kk] != -1) + dep.connect(what_stmt_num[i][kk], j->first, j->second); + + j++; + } + } + else { + if (same_loop.find(j->first) != same_loop.end()) + for (int k = 0; k < j->second.size(); k++) + for (int kk = 0; kk < unroll_amount - 1; kk++) + if (what_stmt_num[j->first][kk] != -1) + D.push_back(std::make_pair(what_stmt_num[j->first][kk], j->second[k])); + j++; + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + } + + // reset lexical order for the unrolled loop body + std::set<int> new_same_loop; + for (std::map<int, std::vector<int> >::iterator i = what_stmt_num.begin(); i != what_stmt_num.end(); i++) { + new_same_loop.insert(i->first); + for (int j = 0; j < i->second.size(); j++) + new_same_loop.insert(i->second[j]); + } + setLexicalOrder(dim+1, new_same_loop); + } + else { + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) + add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); + + int max_level = stmt[stmt_num].loop_level.size(); + std::vector<std::pair<int, int> > stmt_order; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) + stmt_order.push_back(std::make_pair(get_const(stmt[*i].xform, 2*max_level, Output_Var), *i)); + sort(stmt_order.begin(), stmt_order.end()); + + Statement new_stmt; + new_stmt.code = NULL; + for (int j = 1; j < unroll_amount; j++) + for (int i = 0; i < stmt_order.size(); i++) { + Tuple<CG_outputRepr *> funcList; + Tuple<std::string> loop_vars; + loop_vars.append(stmt[stmt_order[i].second].IS.set_var(level)->name()); + funcList.append(ocg->CreatePlus(ocg->CreateIdent(stmt[stmt_order[i].second].IS.set_var(level)->name()), ocg->CreateInt(j*stride))); + CG_outputRepr *code = ocg->CreatePlaceHolder(0, stmt[stmt_order[i].second].code->clone(), funcList, loop_vars); + new_stmt.code = ocg->StmtListAppend(new_stmt.code, code); + } + + new_stmt.IS = copy(stmt[stmt_num].IS); + new_stmt.xform = copy(stmt[stmt_num].xform); + assign_const(new_stmt.xform, 2*max_level, stmt_order[stmt_order.size()-1].first+1); + new_stmt.loop_level = stmt[stmt_num].loop_level; + stmt.push_back(new_stmt); + dep.insert(); + + // update dependence graph + if (stmt[stmt_num].loop_level[level-1].type == LoopLevelOriginal) { + int dep_dim = stmt[stmt_num].loop_level[level-1].payload; + int new_stride = unroll_amount * stride; + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, std::vector<DependenceVector> > > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); ) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + std::vector<DependenceVector> dvs11, dvs12, dvs22, dvs21; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type == DEP_CONTROL || dv.type == DEP_UNKNOWN) { + if (i == j->first) { + dvs11.push_back(dv); + dvs22.push_back(dv); + } + else + throw loop_error("unrolled statements lumped together illegally"); + } + else { + coef_t lb = dv.lbounds[dep_dim]; + coef_t ub = dv.ubounds[dep_dim]; + if (ub == lb && int_mod(lb, static_cast<coef_t>(new_stride)) == 0) { + dvs11.push_back(dv); + dvs22.push_back(dv); + } + else { + if (lb != -posInfinity) + dv.lbounds[dep_dim] = ceil(static_cast<double>(lb)/new_stride) * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = floor(static_cast<double>(ub)/new_stride) * new_stride; + if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) + dvs11.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = ceil(static_cast<double>(lb)/new_stride) * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = ceil(static_cast<double>(ub)/new_stride) * new_stride; + if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) + dvs21.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = floor(static_cast<double>(lb)/new_stride) * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = floor(static_cast<double>(ub-stride)/new_stride) * new_stride; + if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) + dvs12.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = floor(static_cast<double>(lb)/new_stride) * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = ceil(static_cast<double>(ub-stride)/new_stride) * new_stride; + if (dv.ubounds[dep_dim] >= dv.lbounds[dep_dim]) + dvs22.push_back(dv); + } + } + } + if (dvs11.size() > 0) + D.push_back(std::make_pair(i, dvs11)); + if (dvs22.size() > 0) + dep.connect(old_num_stmt, old_num_stmt, dvs22); + if (dvs12.size() > 0) + D.push_back(std::make_pair(old_num_stmt, dvs12)); + if (dvs21.size() > 0) + dep.connect(old_num_stmt, i, dvs21); + + dep.vertex[i].second.erase(j++); + } + else { + dep.connect(old_num_stmt, j->first, j->second); + j++; + } + } + else { + if (same_loop.find(j->first) != same_loop.end()) + D.push_back(std::make_pair(old_num_stmt, j->second)); + j++; + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + } + } + + return result; +} + + +std::vector<int> Loop::getLexicalOrder(int stmt_num) const { + assert(stmt_num < stmt.size()); + + const int n = stmt[stmt_num].xform.n_out(); + std::vector<int> lex(n,0); + + for (int i = 0; i < n; i += 2) + lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var); + + return lex; +} + +std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const { + const int m = stmt.size(); + + std::set<int> same_loops; + for (int i = 0; i < m; i++) { + if (dim < 0) + same_loops.insert(i); + else { + std::vector<int> a_lex = getLexicalOrder(i); + int j; + for (j = 0; j <= dim; j+=2) + if (lex[j] != a_lex[j]) + break; + if (j > dim) + same_loops.insert(i); + } + } + + return same_loops; +} + + +void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) { + const int m = stmt.size(); + + if (amount == 0) + return; + + for (int i = 0; i < m; i++) { + std::vector<int> lex2 = getLexicalOrder(i); + + bool need_shift = true; + + for (int j = 0; j < dim; j++) + if (lex2[j] != lex[j]) { + need_shift = false; + break; + } + + if (!need_shift) + continue; + + if (amount > 0) { + if (lex2[dim] < lex[dim]) + continue; + } + else if (amount < 0) { + if (lex2[dim] > lex[dim]) + continue; + } + + assign_const(stmt[i].xform, dim, lex2[dim] + amount); + } +} + + +void Loop::setLexicalOrder(int dim, const std::set<int> &active, int starting_order) { + if (active.size() == 0) + return; + + // check for sanity of parameters + if (dim < 0 || dim % 2 != 0) + throw std::invalid_argument("invalid constant loop level to set lexicographical order"); + std::vector<int> lex; + int ref_stmt_num; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if ((*i) < 0 || (*i) >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(*i)); + if (dim >= stmt[*i].xform.n_out()) + throw std::invalid_argument("invalid constant loop level to set lexicographical order"); + if (i == active.begin()) { + lex = getLexicalOrder(*i); + ref_stmt_num = *i; + } + else { + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 0; j < dim; j+=2) + if (lex[j] != lex2[j]) + throw std::invalid_argument("statements are not in the same sub loop nest"); + } + } + + // sepearate statements by current loop level types + int level = (dim+2)/2; + std::map<std::pair<LoopLevelType, int>, std::set<int> > active_by_level_type; + std::set<int> active_by_no_level; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if (level > stmt[*i].loop_level.size()) + active_by_no_level.insert(*i); + else + active_by_level_type[std::make_pair(stmt[*i].loop_level[level-1].type, stmt[*i].loop_level[level-1].payload)].insert(*i); + } + + // further separate statements due to control dependences + std::vector<std::set<int> > active_by_level_type_splitted; + for (std::map<std::pair<LoopLevelType, int>, std::set<int> >::iterator i = active_by_level_type.begin(); i != active_by_level_type.end(); i++) + active_by_level_type_splitted.push_back(i->second); + for (std::set<int>::iterator i = active_by_no_level.begin(); i != active_by_no_level.end(); i++) + for (int j = active_by_level_type_splitted.size() - 1; j >= 0; j--) { + std::set<int> controlled, not_controlled; + for (std::set<int>::iterator k = active_by_level_type_splitted[j].begin(); k != active_by_level_type_splitted[j].end(); k++) { + std::vector<DependenceVector> dvs = dep.getEdge(*i, *k); + bool is_controlled = false; + for (int kk = 0; kk < dvs.size(); kk++) + if (dvs[kk].type = DEP_CONTROL) { + is_controlled = true; + break; + } + if (is_controlled) + controlled.insert(*k); + else + not_controlled.insert(*k); + } + if (controlled.size() != 0 && not_controlled.size() != 0) { + active_by_level_type_splitted.erase(active_by_level_type_splitted.begin() + j); + active_by_level_type_splitted.push_back(controlled); + active_by_level_type_splitted.push_back(not_controlled); + } + } + + // set lexical order separating loops with different loop types first + if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) { + int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; + + Graph<std::set<int>, Empty> g; + for (std::vector<std::set<int> >::iterator i = active_by_level_type_splitted.begin(); i != active_by_level_type_splitted.end(); i++) + g.insert(*i); + for (std::set<int>::iterator i = active_by_no_level.begin(); i != active_by_no_level.end(); i++) { + std::set<int> t; + t.insert(*i); + g.insert(t); + } + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i+1; j < g.vertex.size(); j++) { + bool connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*ii, *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && !dvs[k].has_been_carried_before(dep_dim))) { + g.connect(i, j); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*jj, *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && !dvs[k].has_been_carried_before(dep_dim))) { + g.connect(j, i); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + } + + std::vector<std::set<int> > s = g.topoSort(); + if (s.size() != g.vertex.size()) + throw loop_error("cannot separate statements with different loop types at loop level " + to_string(level)); + + // assign lexical order + int order = starting_order; + for (int i = 0; i < s.size(); i++) { + std::set<int> &cur_scc = g.vertex[*(s[i].begin())].first; + int sz = cur_scc.size(); + if (sz == 1) { + int cur_stmt = *(cur_scc.begin()); + assign_const(stmt[cur_stmt].xform, dim, order); + for (int j = dim+2; j < stmt[cur_stmt].xform.n_out(); j+=2) + assign_const(stmt[cur_stmt].xform, j, 0); + order++; + } + else { + setLexicalOrder(dim, cur_scc, order); + order += sz; + } + } + } + // set lexical order seperating single iteration statements and loops + else { + std::set<int> true_singles; + std::set<int> nonsingles; + std::map<coef_t, std::set<int> > fake_singles; + + // sort out statements that do not require loops + for(std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + Relation cur_IS = getNewIS(*i); + if (is_single_iteration(cur_IS, dim+1)) { + bool is_all_single = true; + for (int j = dim+3; j < stmt[*i].xform.n_out(); j+=2) + if (!is_single_iteration(cur_IS, j)) { + is_all_single = false; + break; + } + if (is_all_single) + true_singles.insert(*i); + else { + try { + fake_singles[get_const(cur_IS, dim+1, Set_Var)].insert(*i); + } + catch (const std::exception &e) { + fake_singles[posInfinity].insert(*i); + } + } + } + else + nonsingles.insert(*i); + } + + // split nonsingles forcibly according to negative dependences present (loop unfusible) + int dep_dim = get_dep_dim_of(ref_stmt_num, level); + Graph<int, Empty> g2; + for (std::set<int>::iterator i = nonsingles.begin(); i != nonsingles.end(); i++) + g2.insert(*i); + for (int i = 0; i < g2.vertex.size(); i++) + for (int j = i+1; j < g2.vertex.size(); j++) { + std::vector<DependenceVector> dvs = dep.getEdge(g2.vertex[i].first, g2.vertex[j].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { + g2.connect(i, j); + break; + } + dvs = dep.getEdge(g2.vertex[j].first, g2.vertex[i].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && dvs[k].has_negative_been_carried_at(dep_dim))) { + g2.connect(j, i); + break; + } + } + + std::vector<std::set<int> > s2 = g2.packed_topoSort(); + + std::vector<std::set<int> > splitted_nonsingles; + for (int i = 0; i < s2.size(); i++) { + std::set<int> cur_scc; + for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) + cur_scc.insert(g2.vertex[*j].first); + splitted_nonsingles.push_back(cur_scc); + } + + // convert to dependence graph for grouped statements + dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; + Graph<std::set<int>, Empty> g; + for (std::set<int>::iterator i = true_singles.begin(); i != true_singles.end(); i++) { + std::set<int> t; + t.insert(*i); + g.insert(t); + } + for (int i = 0; i < splitted_nonsingles.size(); i++) { + g.insert(splitted_nonsingles[i]); + } + for (std::map<coef_t, std::set<int> >::iterator i = fake_singles.begin(); i != fake_singles.end(); i++) + g.insert((*i).second); + + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + bool connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*ii, *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && !dvs[k].has_been_carried_before(dep_dim))) { + g.connect(i, j); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*jj, *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() || + (dvs[k].is_data_dependence() && !dvs[k].has_been_carried_before(dep_dim))) { + g.connect(j, i); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + } + + // topological sort according to chun's permute algorithm + std::vector<std::set<int> > s = g.topoSort(); + + // assign lexical order + int order = starting_order; + for (int i = 0; i < s.size(); i++) { + // translate each SCC into original statements + std::set<int> cur_scc; + for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) + copy(g.vertex[*j].first.begin(), g.vertex[*j].first.end(), inserter(cur_scc, cur_scc.begin())); + + // now assign the constant + for(std::set<int>::iterator j = cur_scc.begin(); j != cur_scc.end(); j++) + assign_const(stmt[*j].xform, dim, order); + + if (cur_scc.size() > 1) + setLexicalOrder(dim+2, cur_scc); + else if (cur_scc.size() == 1) { + int cur_stmt =*(cur_scc.begin()); + for (int j = dim+2; j < stmt[cur_stmt].xform.n_out(); j+=2) + assign_const(stmt[cur_stmt].xform, j, 0); + } + + if (cur_scc.size() > 0) + order++; + } + } +} + + +void Loop::apply_xform() { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + apply_xform(active); +} + + +void Loop::apply_xform(int stmt_num) { + std::set<int> active; + active.insert(stmt_num); + apply_xform(active); +} + + +void Loop::apply_xform(std::set<int> &active) { + int max_n = 0; + + CG_outputBuilder *ocg = ir->builder(); + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].loop_level.size(); + if (n > max_n) + max_n = n; + + std::vector<int> lex = getLexicalOrder(*i); + + Relation mapping(2*n+1, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(2*j), -1); + } + mapping = Composition(mapping, stmt[*i].xform); + mapping.simplify(); + + // match omega input/output variables to variable names in the code + for (int j = 1; j <= stmt[*i].IS.n_set(); j++) + mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name()); + for (int j = 1; j <= n; j++) + mapping.name_output_var(j, tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+j-1)); + mapping.setup_names(); + + Relation known = Extend_Set(copy(this->known), mapping.n_out() - this->known.n_set()); + //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL)); + stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out())); + stmt[*i].IS = Range(Restrict_Domain(mapping, stmt[*i].IS)); + stmt[*i].IS.simplify(); + + // replace original transformation relation with straight 1-1 mapping + mapping = Relation(n, 2*n+1); + f_root = mapping.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2*j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 1; j <= 2*n+1; j+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_const(-lex[j-1]); + } + stmt[*i].xform = mapping; + } + + tmp_loop_var_name_counter += max_n; +} + + +void Loop::addKnown(const Relation &cond) { + int n1 = this->known.n_set(); + + Relation r = copy(cond); + int n2 = r.n_set(); + + if (n1 < n2) + this->known = Extend_Set(this->known, n2-n1); + else if (n1 > n2) + r = Extend_Set(r, n1-n2); + + this->known = Intersection(this->known, r); +} + + +bool Loop::nonsingular(const std::vector<std::vector<int> > &T) { + if (stmt.size() == 0) + return true; + + // check for sanity of parameters + for (int i = 0; i < stmt.size(); i++) { + if (stmt[i].loop_level.size() != num_dep_dim) + throw std::invalid_argument("nonsingular loop transformations must be applied to original perfect loop nest"); + for (int j = 0; j < stmt[i].loop_level.size(); j++) + if (stmt[i].loop_level[j].type != LoopLevelOriginal) + throw std::invalid_argument("nonsingular loop transformations must be applied to original perfect loop nest"); + } + if (T.size() != num_dep_dim) + throw std::invalid_argument("invalid transformation matrix"); + for (int i = 0; i < stmt.size(); i++) + if (T[i].size() != num_dep_dim + 1 && T[i].size() != num_dep_dim) + throw std::invalid_argument("invalid transformation matrix"); + + // build relation from matrix + Relation mapping(2*num_dep_dim+1, 2*num_dep_dim+1); + F_And *f_root = mapping.add_and(); + for (int i = 0; i < num_dep_dim; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2*(i+1)), -1); + for (int j = 0; j < num_dep_dim; j++) + if (T[i][j] != 0) + h.update_coef(mapping.input_var(2*(j+1)), T[i][j]); + if (T[i].size() == num_dep_dim+1) + h.update_const(T[i][num_dep_dim]); + } + for (int i = 1; i <= 2*num_dep_dim+1; i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), -1); + h.update_coef(mapping.input_var(i), 1); + } + + // update transformation relations + for (int i = 0; i < stmt.size(); i++) + stmt[i].xform = Composition(copy(mapping), stmt[i].xform); + + // update dependence graph + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) { + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + switch (dv.type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(num_dep_dim), ubounds(num_dep_dim); + for (int p = 0; p < num_dep_dim; p++) { + coef_t lb = 0; + coef_t ub = 0; + for (int q = 0; q < num_dep_dim; q++) { + if (T[p][q] > 0) { + if (lb == -posInfinity || dv.lbounds[q] == -posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.lbounds[q]; + if (ub == posInfinity || dv.ubounds[q] == posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.ubounds[q]; + } + else if (T[p][q] < 0) { + if (lb == -posInfinity || dv.ubounds[q] == posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.ubounds[q]; + if (ub == posInfinity || dv.lbounds[q] == -posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.lbounds[q]; + } + } + if (T[p].size() == num_dep_dim+1) { + if (lb != -posInfinity) + lb += T[p][num_dep_dim]; + if (ub != posInfinity) + ub += T[p][num_dep_dim]; + } + lbounds[p] = lb; + ubounds[p] = ub; + } + dv.lbounds = lbounds; + dv.ubounds = ubounds; + + break; + } + default: + ; + } + } + j->second = dvs; + } + + // set constant loop values + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + setLexicalOrder(0, active); + + return true; +} + + +void Loop::skew(const std::set<int> &stmt_nums, int level, const std::vector<int> &skew_amount) { + if (stmt_nums.size() == 0) + return; + + // check for sanity of parameters + int ref_stmt_num = *(stmt_nums.begin()); + for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(*i)); + if (level < 1 || level > stmt[*i].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + for (int j = stmt[*i].loop_level.size(); j < skew_amount.size(); j++) + if (skew_amount[j] != 0) + throw std::invalid_argument("invalid skewing formula"); + } + + // set trasformation relations + for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation r(n,n); + F_And *f_root = r.add_and(); + for (int j = 1; j <= n; j++) + if (j != 2*level) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + } + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(2*level), -1); + for (int j = 0; j < skew_amount.size(); j++) + if (skew_amount[j] != 0) + h.update_coef(r.input_var(2*(j+1)), skew_amount[j]); + + stmt[*i].xform = Composition(r, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // update dependence graph + if (stmt[ref_stmt_num].loop_level[level-1].type == LoopLevelOriginal) { + int dep_dim = stmt[ref_stmt_num].loop_level[level-1].payload; + for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) + for (DependenceGraph::EdgeList::iterator j = dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence between skewed statements + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + coef_t lb = 0; + coef_t ub = 0; + for (int kk = 0; kk < skew_amount.size(); kk++) { + int cur_dep_dim = get_dep_dim_of(*i, kk+1); + if (skew_amount[kk] > 0) { + if (lb != -posInfinity && + stmt[*i].loop_level[kk].type == LoopLevelOriginal && + dv.lbounds[cur_dep_dim] != -posInfinity) + lb += skew_amount[kk] * dv.lbounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 && !(dv.lbounds[cur_dep_dim] == 0 && dv.ubounds[cur_dep_dim] == 0)) + lb = -posInfinity; + } + if (ub != posInfinity && + stmt[*i].loop_level[kk].type == LoopLevelOriginal && + dv.ubounds[cur_dep_dim] != posInfinity) + ub += skew_amount[kk] * dv.ubounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 && !(dv.lbounds[cur_dep_dim] == 0 && dv.ubounds[cur_dep_dim] == 0)) + ub = posInfinity; + } + } + else if (skew_amount[kk] < 0) { + if (lb != -posInfinity && + stmt[*i].loop_level[kk].type == LoopLevelOriginal && + dv.ubounds[cur_dep_dim] != posInfinity) + lb += skew_amount[kk] * dv.ubounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 && !(dv.lbounds[cur_dep_dim] == 0 && dv.ubounds[cur_dep_dim] == 0)) + lb = -posInfinity; + } + if (ub != posInfinity && + stmt[*i].loop_level[kk].type == LoopLevelOriginal && + dv.lbounds[cur_dep_dim] != -posInfinity) + ub += skew_amount[kk] * dv.lbounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 && !(dv.lbounds[cur_dep_dim] == 0 && dv.ubounds[cur_dep_dim] == 0)) + ub = posInfinity; + } + } + } + dv.lbounds[dep_dim] = lb; + dv.ubounds[dep_dim] = ub; + } + } + j->second = dvs; + } + else { + // dependence from skewed statement to unskewed statement becomes jumbled, + // put distance value at skewed dimension to unknown + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + } + j->second = dvs; + } + for (int i = 0; i < dep.vertex.size(); i++) + if (stmt_nums.find(i) == stmt_nums.end()) + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence from unskewed statement to skewed statement becomes jumbled, + // put distance value at skewed dimension to unknown + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + } + j->second = dvs; + } + } +} + + +void Loop::shift(const std::set<int> &stmt_nums, int level, int shift_amount) { + if (stmt_nums.size() == 0) + return; + + // check for sanity of parameters + int ref_stmt_num = *(stmt_nums.begin()); + for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(*i)); + if (level < 1 || level > stmt[*i].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + } + + // do nothing + if (shift_amount == 0) + return; + + // set trasformation relations + for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { + int n = stmt[*i].xform.n_out(); + + Relation r(n, n); + F_And *f_root = r.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + if (j == 2*level) + h.update_const(shift_amount); + } + + stmt[*i].xform = Composition(r, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // update dependence graph + if (stmt[ref_stmt_num].loop_level[level-1].type == LoopLevelOriginal) { + int dep_dim = stmt[ref_stmt_num].loop_level[level-1].payload; + for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) + for (DependenceGraph::EdgeList::iterator j = dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); j++) + if (stmt_nums.find(j->first) == stmt_nums.end()) { + // dependence from shifted statement to unshifted statement + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + if (dv.lbounds[dep_dim] != -posInfinity) + dv.lbounds[dep_dim] -= shift_amount; + if (dv.ubounds[dep_dim] != posInfinity) + dv.ubounds[dep_dim] -= shift_amount; + } + } + j->second = dvs; + } + for (int i = 0; i < dep.vertex.size(); i++) + if (stmt_nums.find(i) == stmt_nums.end()) + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence from unshifted statement to shifted statement + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + if (dv.lbounds[dep_dim] != -posInfinity) + dv.lbounds[dep_dim] += shift_amount; + if (dv.ubounds[dep_dim] != posInfinity) + dv.ubounds[dep_dim] += shift_amount; + } + } + j->second = dvs; + } + } +} + + + +// bool Loop::fuse(const std::set<int> &stmt_nums, int level) { +// if (stmt_nums.size() == 0 || stmt_nums.size() == 1) +// return true; +// int dim = 2*level-1; + +// // check for sanity of parameters +// std::vector<int> ref_lex; +// for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { +// if (*i < 0 || *i >= stmt.size()) +// throw std::invalid_argument("invalid statement number " + to_string(*i)); +// if (level < 1 || level > (stmt[*i].xform.n_out()-1)/2) +// throw std::invalid_argument("invalid loop level " + to_string(level)); +// if (ref_lex.size() == 0) +// ref_lex = getLexicalOrder(*i); +// else { +// std::vector<int> lex = getLexicalOrder(*i); +// for (int j = 0; j < dim-1; j+=2) +// if (lex[j] != ref_lex[j]) +// throw std::invalid_argument("statements for fusion must be in the same level-" + to_string(level-1) + " subloop"); +// } +// } + +// // collect lexicographical order values from to-be-fused statements +// std::set<int> lex_values; +// for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { +// std::vector<int> lex = getLexicalOrder(*i); +// lex_values.insert(lex[dim-1]); +// } +// if (lex_values.size() == 1) +// return true; + +// // negative dependence would prevent fusion +// int dep_dim = xform_index[dim].first; +// for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end(); i++) { +// ref_lex[dim-1] = *i; +// std::set<int> a = getStatements(ref_lex, dim-1); +// std::set<int>::iterator j = i; +// j++; +// for (; j != lex_values.end(); j++) { +// ref_lex[dim-1] = *j; +// std::set<int> b = getStatements(ref_lex, dim-1); +// for (std::set<int>::iterator ii = a.begin(); ii != a.end(); ii++) +// for (std::set<int>::iterator jj = b.begin(); jj != b.end(); jj++) { +// std::vector<DependenceVector> dvs; +// dvs = dep.getEdge(*ii, *jj); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim) && dvs[k].hasNegative(dep_dim)) +// throw loop_error("loop error: statements " + to_string(*ii) + " and " + to_string(*jj) + " cannot be fused together due to negative dependence"); +// dvs = dep.getEdge(*jj, *ii); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim) && dvs[k].hasNegative(dep_dim)) +// throw loop_error("loop error: statements " + to_string(*jj) + " and " + to_string(*ii) + " cannot be fused together due to negative dependence"); +// } +// } +// } + +// // collect all other lexicographical order values from the subloop +// // enclosing these to-be-fused loops +// std::set<int> same_loop = getStatements(ref_lex, dim-3); +// std::set<int> other_lex_values; +// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { +// std::vector<int> lex = getLexicalOrder(*i); +// if (lex_values.find(lex[dim-1]) == lex_values.end()) +// other_lex_values.insert(lex[dim-1]); +// } + +// // update to-be-fused loops due to dependence cycle +// Graph<std::set<int>, Empty> g; +// { +// std::set<int> t; +// for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end(); i++) { +// ref_lex[dim-1] = *i; +// std::set<int> t2 = getStatements(ref_lex, dim-1); +// std::set_union(t.begin(), t.end(), t2.begin(), t2.end(), inserter(t, t.begin())); +// } +// g.insert(t); +// } +// for (std::set<int>::iterator i = other_lex_values.begin(); i != other_lex_values.end(); i++) { +// ref_lex[dim-1] = *i; +// std::set<int> t = getStatements(ref_lex, dim-1); +// g.insert(t); +// } +// for (int i = 0; i < g.vertex.size(); i++) +// for (int j = i+1; j < g.vertex.size(); j++) +// for (std::set<int>::iterator ii = g.vertex[i].first.begin(); ii != g.vertex[i].first.end(); ii++) +// for (std::set<int>::iterator jj = g.vertex[j].first.begin(); jj != g.vertex[j].first.end(); jj++) { +// std::vector<DependenceVector> dvs; +// dvs = dep.getEdge(*ii, *jj); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim)) { +// g.connect(i, j); +// break; +// } +// dvs = dep.getEdge(*jj, *ii); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim)) { +// g.connect(j, i); +// break; +// } +// } +// std::vector<std::set<int> > s = g.topoSort(); +// int fused_lex_value = 0; +// for (int i = 0; i < s.size(); i++) +// if (s[i].find(0) != s[i].end()) { +// // now add additional lexicographical order values +// for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) +// if (*j != 0) { +// int stmt = *(g.vertex[*j].first.begin()); +// std::vector<int> lex = getLexicalOrder(stmt); +// lex_values.insert(lex[dim-1]); +// } + +// if (s.size() > 1) { +// if (i == 0) { +// int min_lex_value; +// for (std::set<int>::iterator j = s[i+1].begin(); j != s[i+1].end(); j++) { +// int stmt = *(g.vertex[*j].first.begin()); +// std::vector<int> lex = getLexicalOrder(stmt); +// if (j == s[i+1].begin()) +// min_lex_value = lex[dim-1]; +// else if (lex[dim-1] < min_lex_value) +// min_lex_value = lex[dim-1]; +// } +// fused_lex_value = min_lex_value - 1; +// } +// else { +// int max_lex_value; +// for (std::set<int>::iterator j = s[i-1].begin(); j != s[i-1].end(); j++) { +// int stmt = *(g.vertex[*j].first.begin()); +// std::vector<int> lex = getLexicalOrder(stmt); +// if (j == s[i-1].begin()) +// max_lex_value = lex[dim-1]; +// else if (lex[dim-1] > max_lex_value) +// max_lex_value = lex[dim-1]; +// } +// fused_lex_value = max_lex_value + 1; +// } +// } + +// break; +// } + +// // sort the newly updated to-be-fused lexicographical order values +// std::vector<int> ordered_lex_values; +// for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end(); i++) +// ordered_lex_values.push_back(*i); +// std::sort(ordered_lex_values.begin(), ordered_lex_values.end()); + +// // make sure internal loops inside to-be-fused loops have the same +// // lexicographical order before and after fusion +// std::vector<std::pair<int, int> > inside_lex_range(ordered_lex_values.size()); +// for (int i = 0; i < ordered_lex_values.size(); i++) { +// ref_lex[dim-1] = ordered_lex_values[i]; +// std::set<int> the_stmts = getStatements(ref_lex, dim-1); +// std::set<int>::iterator j = the_stmts.begin(); +// std::vector<int> lex = getLexicalOrder(*j); +// int min_inside_lex_value = lex[dim+1]; +// int max_inside_lex_value = lex[dim+1]; +// j++; +// for (; j != the_stmts.end(); j++) { +// std::vector<int> lex = getLexicalOrder(*j); +// if (lex[dim+1] < min_inside_lex_value) +// min_inside_lex_value = lex[dim+1]; +// if (lex[dim+1] > max_inside_lex_value) +// max_inside_lex_value = lex[dim+1]; +// } +// inside_lex_range[i].first = min_inside_lex_value; +// inside_lex_range[i].second = max_inside_lex_value; +// } +// for (int i = 1; i < ordered_lex_values.size(); i++) +// if (inside_lex_range[i].first <= inside_lex_range[i-1].second) { +// int shift_lex_value = inside_lex_range[i-1].second - inside_lex_range[i].first + 1; +// ref_lex[dim-1] = ordered_lex_values[i]; +// ref_lex[dim+1] = inside_lex_range[i].first; +// shiftLexicalOrder(ref_lex, dim+1, shift_lex_value); +// inside_lex_range[i].first += shift_lex_value; +// inside_lex_range[i].second += shift_lex_value; +// } + +// // set lexicographical order for fused loops +// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { +// std::vector<int> lex = getLexicalOrder(*i); +// if (lex_values.find(lex[dim-1]) != lex_values.end()) +// assign_const(stmt[*i].xform, dim-1, fused_lex_value); +// } + +// // no need to update dependence graph +// ; + +// return true; +// } + + +// bool Loop::distribute(const std::set<int> &stmt_nums, int level) { +// if (stmt_nums.size() == 0 || stmt_nums.size() == 1) +// return true; +// int dim = 2*level-1; + +// // check for sanity of parameters +// std::vector<int> ref_lex; +// for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { +// if (*i < 0 || *i >= stmt.size()) +// throw std::invalid_argument("invalid statement number " + to_string(*i)); +// if (level < 1 || level > (stmt[*i].xform.n_out()-1)/2) +// throw std::invalid_argument("invalid loop level " + to_string(level)); +// if (ref_lex.size() == 0) +// ref_lex = getLexicalOrder(*i); +// else { +// std::vector<int> lex = getLexicalOrder(*i); +// for (int j = 0; j <= dim-1; j+=2) +// if (lex[j] != ref_lex[j]) +// throw std::invalid_argument("statements for distribution must be in the same level-" + to_string(level) + " subloop"); +// } +// } + +// // find SCC in the to-be-distributed loop +// int dep_dim = xform_index[dim].first; +// std::set<int> same_loop = getStatements(ref_lex, dim-1); +// Graph<int, Empty> g; +// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) +// g.insert(*i); +// for (int i = 0; i < g.vertex.size(); i++) +// for (int j = i+1; j < g.vertex.size(); j++) { +// std::vector<DependenceVector> dvs; +// dvs = dep.getEdge(g.vertex[i].first, g.vertex[j].first); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim)) { +// g.connect(i, j); +// break; +// } +// dvs = dep.getEdge(g.vertex[j].first, g.vertex[i].first); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim)) { +// g.connect(j, i); +// break; +// } +// } +// std::vector<std::set<int> > s = g.topoSort(); + +// // find statements that cannot be distributed due to dependence cycle +// Graph<std::set<int>, Empty> g2; +// for (int i = 0; i < s.size(); i++) { +// std::set<int> t; +// for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) +// if (stmt_nums.find(g.vertex[*j].first) != stmt_nums.end()) +// t.insert(g.vertex[*j].first); +// if (!t.empty()) +// g2.insert(t); +// } +// for (int i = 0; i < g2.vertex.size(); i++) +// for (int j = i+1; j < g2.vertex.size(); j++) +// for (std::set<int>::iterator ii = g2.vertex[i].first.begin(); ii != g2.vertex[i].first.end(); ii++) +// for (std::set<int>::iterator jj = g2.vertex[j].first.begin(); jj != g2.vertex[j].first.end(); jj++) { +// std::vector<DependenceVector> dvs; +// dvs = dep.getEdge(*ii, *jj); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim)) { +// g2.connect(i, j); +// break; +// } +// dvs = dep.getEdge(*jj, *ii); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim)) { +// g2.connect(j, i); +// break; +// } +// } +// std::vector<std::set<int> > s2 = g2.topoSort(); + +// // nothing to distribute +// if (s2.size() == 1) +// throw loop_error("loop error: no statement can be distributed due to dependence cycle"); + +// std::vector<std::set<int> > s3; +// for (int i = 0; i < s2.size(); i++) { +// std::set<int> t; +// for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) +// std::set_union(t.begin(), t.end(), g2.vertex[*j].first.begin(), g2.vertex[*j].first.end(), inserter(t, t.begin())); +// s3.push_back(t); +// } + +// // associate other affected statements with the right distributed statements +// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) +// if (stmt_nums.find(*i) == stmt_nums.end()) { +// bool is_inserted = false; +// int potential_insertion_point = 0; +// for (int j = 0; j < s3.size(); j++) { +// for (std::set<int>::iterator k = s3[j].begin(); k != s3[j].end(); k++) { +// std::vector<DependenceVector> dvs; +// dvs = dep.getEdge(*i, *k); +// for (int kk = 0; kk < dvs.size(); kk++) +// if (dvs[kk].isCarried(dep_dim)) { +// s3[j].insert(*i); +// is_inserted = true; +// break; +// } +// dvs = dep.getEdge(*k, *i); +// for (int kk = 0; kk < dvs.size(); kk++) +// if (dvs[kk].isCarried(dep_dim)) +// potential_insertion_point = j; +// } +// if (is_inserted) +// break; +// } + +// if (!is_inserted) +// s3[potential_insertion_point].insert(*i); +// } + +// // set lexicographical order after distribution +// int order = ref_lex[dim-1]; +// shiftLexicalOrder(ref_lex, dim-1, s3.size()-1); +// for (std::vector<std::set<int> >::iterator i = s3.begin(); i != s3.end(); i++) { +// for (std::set<int>::iterator j = (*i).begin(); j != (*i).end(); j++) +// assign_const(stmt[*j].xform, dim-1, order); +// order++; +// } + +// // no need to update dependence graph +// ; + +// return true; +// } + + + + + + + + diff --git a/loop_basic.cc b/loop_basic.cc new file mode 100644 index 0000000..f5234b9 --- /dev/null +++ b/loop_basic.cc @@ -0,0 +1,1538 @@ +/* + * loop_basic.cc + * + * Created on: Nov 12, 2012 + * Author: anand + */ + +#include "loop.hh" +#include "chill_error.hh" +#include <omega.h> +#include "omegatools.hh" +#include <string.h> + +using namespace omega; + +void Loop::permute(const std::vector<int> &pi) { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + + permute(active, pi); +} + +void Loop::original() { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + setLexicalOrder(0, active); +} +void Loop::permute(int stmt_num, int level, const std::vector<int> &pi) { + // check for sanity of parameters + int starting_order; + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(stmt_num)); + std::set<int> active; + if (level < 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + else if (level == 0) { + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + level = 1; + starting_order = 0; + } else { + std::vector<int> lex = getLexicalOrder(stmt_num); + active = getStatements(lex, 2 * level - 2); + starting_order = lex[2 * level - 2]; + lex[2 * level - 2]++; + shiftLexicalOrder(lex, 2 * level - 2, active.size() - 1); + } + std::vector<int> pi_inverse(pi.size(), 0); + for (int i = 0; i < pi.size(); i++) { + if (pi[i] >= level + pi.size() || pi[i] < level + || pi_inverse[pi[i] - level] != 0) + throw std::invalid_argument("invalid permuation"); + pi_inverse[pi[i] - level] = level + i; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + if (level + pi.size() - 1 > stmt[*i].loop_level.size()) + throw std::invalid_argument( + "invalid permutation for statement " + to_string(*i)); + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // Update transformation relations + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= 2 * level - 2; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = level; j <= level + pi.size() - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(2 * pi[j - level]), -1); + } + for (int j = level; j <= level + pi.size() - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j - 1), 1); + h.update_coef(mapping.input_var(2 * j - 1), -1); + } + for (int j = 2 * (level + pi.size() - 1) + 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + stmt[*i].xform = Composition(mapping, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // get the permuation for dependence vectors + std::vector<int> t; + for (int i = 0; i < pi.size(); i++) + if (stmt[stmt_num].loop_level[pi[i] - 1].type == LoopLevelOriginal) + t.push_back(stmt[stmt_num].loop_level[pi[i] - 1].payload); + int max_dep_dim = -1; + int min_dep_dim = dep.num_dim(); + for (int i = 0; i < t.size(); i++) { + if (t[i] > max_dep_dim) + max_dep_dim = t[i]; + if (t[i] < min_dep_dim) + min_dep_dim = t[i]; + } + if (min_dep_dim > max_dep_dim) + return; + if (max_dep_dim - min_dep_dim + 1 != t.size()) + throw loop_error("cannot update the dependence graph after permuation"); + std::vector<int> dep_pi(dep.num_dim()); + for (int i = 0; i < min_dep_dim; i++) + dep_pi[i] = i; + for (int i = min_dep_dim; i <= max_dep_dim; i++) + dep_pi[i] = t[i - min_dep_dim]; + for (int i = max_dep_dim + 1; i < dep.num_dim(); i++) + dep_pi[i] = i; + + dep.permute(dep_pi, active); + + // update the dependence graph + DependenceGraph g(dep.num_dim()); + for (int i = 0; i < dep.vertex.size(); i++) + g.insert(); + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { + if ((active.find(i) != active.end() + && active.find(j->first) != active.end())) { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) { + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(dep.num_dim()); + std::vector<coef_t> ubounds(dep.num_dim()); + for (int d = 0; d < dep.num_dim(); d++) { + lbounds[d] = dv[k].lbounds[dep_pi[d]]; + ubounds[d] = dv[k].ubounds[dep_pi[d]]; + } + dv[k].lbounds = lbounds; + dv[k].ubounds = ubounds; + break; + } + case DEP_CONTROL: { + break; + } + default: + throw loop_error("unknown dependence type"); + } + } + g.connect(i, j->first, dv); + } else if (active.find(i) == active.end() + && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dv = j->second; + g.connect(i, j->first, dv); + } else { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + for (int d = 0; d < dep.num_dim(); d++) + if (dep_pi[d] != d) { + dv[k].lbounds[d] = -posInfinity; + dv[k].ubounds[d] = posInfinity; + } + break; + } + case DEP_CONTROL: + break; + default: + throw loop_error("unknown dependence type"); + } + g.connect(i, j->first, dv); + } + } + dep = g; + + // update loop level information + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int cur_dep_dim = min_dep_dim; + std::vector<LoopLevel> new_loop_level(stmt[*i].loop_level.size()); + for (int j = 1; j <= stmt[*i].loop_level.size(); j++) + if (j >= level && j < level + pi.size()) { + switch (stmt[*i].loop_level[pi_inverse[j - level] - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = cur_dep_dim++; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[pi_inverse[j - level] - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[pi_inverse[j - level] + - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = pi_inverse[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } else { + switch (stmt[*i].loop_level[j - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = + stmt[*i].loop_level[j - 1].payload; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[j - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = pi_inverse[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } + stmt[*i].loop_level = new_loop_level; + } + + setLexicalOrder(2 * level - 2, active, starting_order); +} +void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) { + if (active.size() == 0 || pi.size() == 0) + return; + + // check for sanity of parameters + int level = pi[0]; + for (int i = 1; i < pi.size(); i++) + if (pi[i] < level) + level = pi[i]; + if (level < 1) + throw std::invalid_argument("invalid permuation"); + std::vector<int> reverse_pi(pi.size(), 0); + for (int i = 0; i < pi.size(); i++) + if (pi[i] >= level + pi.size()) + throw std::invalid_argument("invalid permutation"); + else + reverse_pi[pi[i] - level] = i + level; + for (int i = 0; i < reverse_pi.size(); i++) + if (reverse_pi[i] == 0) + throw std::invalid_argument("invalid permuation"); + int ref_stmt_num; + std::vector<int> lex; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(*i)); + if (i == active.begin()) { + ref_stmt_num = *i; + lex = getLexicalOrder(*i); + } else { + if (level + pi.size() - 1 > stmt[*i].loop_level.size()) + throw std::invalid_argument("invalid permuation"); + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 0; j < 2 * level - 3; j += 2) + if (lex[j] != lex2[j]) + throw std::invalid_argument( + "statements to permute must be in the same subloop"); + for (int j = 0; j < pi.size(); j++) + if (!(stmt[*i].loop_level[level + j - 1].type + == stmt[ref_stmt_num].loop_level[level + j - 1].type + && stmt[*i].loop_level[level + j - 1].payload + == stmt[ref_stmt_num].loop_level[level + j - 1].payload)) + throw std::invalid_argument( + "permuted loops must have the same loop level types"); + } + } + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // Update transformation relations + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= n; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 0; j < pi.size(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * (level + j)), 1); + h.update_coef(mapping.input_var(2 * pi[j]), -1); + } + for (int j = 1; j < level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + for (int j = level + pi.size(); j <= stmt[*i].loop_level.size(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + + stmt[*i].xform = Composition(mapping, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // get the permuation for dependence vectors + std::vector<int> t; + for (int i = 0; i < pi.size(); i++) + if (stmt[ref_stmt_num].loop_level[pi[i] - 1].type == LoopLevelOriginal) + t.push_back(stmt[ref_stmt_num].loop_level[pi[i] - 1].payload); + int max_dep_dim = -1; + int min_dep_dim = num_dep_dim; + for (int i = 0; i < t.size(); i++) { + if (t[i] > max_dep_dim) + max_dep_dim = t[i]; + if (t[i] < min_dep_dim) + min_dep_dim = t[i]; + } + if (min_dep_dim > max_dep_dim) + return; + if (max_dep_dim - min_dep_dim + 1 != t.size()) + throw loop_error("cannot update the dependence graph after permuation"); + std::vector<int> dep_pi(num_dep_dim); + for (int i = 0; i < min_dep_dim; i++) + dep_pi[i] = i; + for (int i = min_dep_dim; i <= max_dep_dim; i++) + dep_pi[i] = t[i - min_dep_dim]; + for (int i = max_dep_dim + 1; i < num_dep_dim; i++) + dep_pi[i] = i; + + dep.permute(dep_pi, active); + + // update the dependence graph + DependenceGraph g(dep.num_dim()); + for (int i = 0; i < dep.vertex.size(); i++) + g.insert(); + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { // + if ((active.find(i) != active.end() + && active.find(j->first) != active.end())) { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) { + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(num_dep_dim); + std::vector<coef_t> ubounds(num_dep_dim); + for (int d = 0; d < num_dep_dim; d++) { + lbounds[d] = dv[k].lbounds[dep_pi[d]]; + ubounds[d] = dv[k].ubounds[dep_pi[d]]; + } + dv[k].lbounds = lbounds; + dv[k].ubounds = ubounds; + break; + } + case DEP_CONTROL: { + break; + } + default: + throw loop_error("unknown dependence type"); + } + } + g.connect(i, j->first, dv); + } else if (active.find(i) == active.end() + && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dv = j->second; + g.connect(i, j->first, dv); + } else { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + for (int d = 0; d < num_dep_dim; d++) + if (dep_pi[d] != d) { + dv[k].lbounds[d] = -posInfinity; + dv[k].ubounds[d] = posInfinity; + } + break; + } + case DEP_CONTROL: + break; + default: + throw loop_error("unknown dependence type"); + } + g.connect(i, j->first, dv); + } + } + dep = g; + + // update loop level information + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int cur_dep_dim = min_dep_dim; + std::vector<LoopLevel> new_loop_level(stmt[*i].loop_level.size()); + for (int j = 1; j <= stmt[*i].loop_level.size(); j++) + if (j >= level && j < level + pi.size()) { + switch (stmt[*i].loop_level[reverse_pi[j - level] - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = cur_dep_dim++; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[reverse_pi[j - level] + - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = reverse_pi[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } else { + switch (stmt[*i].loop_level[j - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = + stmt[*i].loop_level[j - 1].payload; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[j - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = reverse_pi[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } + stmt[*i].loop_level = new_loop_level; + } + + setLexicalOrder(2 * level - 2, active); +} + +std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + std::set<int> result; + int dim = 2 * level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim - 1); + + Relation cond2 = copy(cond); + cond2.simplify(); + cond2 = EQs_to_GEQs(cond2); + Conjunct *c = cond2.single_conjunct(); + int cur_lex = lex[dim - 1]; + + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int max_level = (*gi).max_tuple_pos(); + Relation single_cond(max_level); + single_cond.and_with_GEQ(*gi); + + // TODO: should decide where to place newly created statements with + // complementary split condition from dependence graph. + bool place_after; + if (max_level == 0) + place_after = true; + else if ((*gi).get_coef(cond2.set_var(max_level)) < 0) + place_after = true; + else + place_after = false; + + bool temp_place_after; // = place_after; + bool assigned = false; + int part1_to_part2; + int part2_to_part1; + // original statements with split condition, + // new statements with complement of split condition + int old_num_stmt = stmt.size(); + std::map<int, int> what_stmt_num; + apply_xform(same_loop); + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + int n = stmt[*i].IS.n_set(); + Relation part1, part2; + if (max_level > n) { + part1 = copy(stmt[*i].IS); + part2 = Relation::False(0); + } else { + part1 = Intersection(copy(stmt[*i].IS), + Extend_Set(copy(single_cond), n - max_level)); + part2 = Intersection(copy(stmt[*i].IS), + Extend_Set(Complement(copy(single_cond)), + n - max_level)); + } + + //split dependence check + + if (max_level > level) { + + DNF_Iterator di1(stmt[*i].IS.query_DNF()); + DNF_Iterator di2(part1.query_DNF()); + for (; di1 && di2; di1++, di2++) { + //printf("In next conjunct,\n"); + EQ_Iterator ei1 = (*di1)->EQs(); + EQ_Iterator ei2 = (*di2)->EQs(); + for (; ei1 && ei2; ei1++, ei2++) { + //printf(" In next equality constraint,\n"); + Constr_Vars_Iter cvi1(*ei1); + Constr_Vars_Iter cvi2(*ei2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*ei1).get_const() + != (*ei2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type + == LoopLevelTile) + dimension = + stmt[*i].loop_level[dimension].payload; + + dimension = stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type != DEP_CONTROL) + if (dv.hasNegative(dimension) + && !dv.quasi) + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + GEQ_Iterator gi1 = (*di1)->GEQs(); + GEQ_Iterator gi2 = (*di2)->GEQs(); + + for (; gi1 && gi2; gi++, gi2++) { + + Constr_Vars_Iter cvi1(*gi1); + Constr_Vars_Iter cvi2(*gi2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*gi1).get_const() + != (*gi2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type + == LoopLevelTile) + stmt[*i].loop_level[dimension].payload; + + dimension = + stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); + j++) { + for (int k = 0; k < j->second.size(); + k++) { + DependenceVector dv = j->second[k]; + if (dv.type != DEP_CONTROL) + if (dv.hasNegative(dimension) + && !dv.quasi) + + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + } + + } + + } + + DNF_Iterator di3(stmt[*i].IS.query_DNF()); + DNF_Iterator di4(part2.query_DNF()); // + for (; di3 && di4; di3++, di4++) { + EQ_Iterator ei1 = (*di3)->EQs(); + EQ_Iterator ei2 = (*di4)->EQs(); + for (; ei1 && ei2; ei1++, ei2++) { + Constr_Vars_Iter cvi1(*ei1); + Constr_Vars_Iter cvi2(*ei2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*ei1).get_const() + != (*ei2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type + == LoopLevelTile) + stmt[*i].loop_level[dimension].payload; + + dimension = stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type != DEP_CONTROL) + if (dv.hasNegative(dimension) + && !dv.quasi) + + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + } + GEQ_Iterator gi1 = (*di3)->GEQs(); + GEQ_Iterator gi2 = (*di4)->GEQs(); + + for (; gi1 && gi2; gi++, gi2++) { + Constr_Vars_Iter cvi1(*gi1); + Constr_Vars_Iter cvi2(*gi2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*gi1).get_const() + != (*gi2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type // + == LoopLevelTile) + stmt[*i].loop_level[dimension].payload; + + dimension = stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type != DEP_CONTROL) + if (dv.hasNegative(dimension) + && !dv.quasi) + + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + } + + } + + } + + stmt[*i].IS = part1; + + if (Intersection(copy(part2), + Extend_Set(copy(this->known), n - this->known.n_set())).is_upper_bound_satisfiable()) { + Statement new_stmt; + new_stmt.code = stmt[*i].code->clone(); + new_stmt.IS = part2; + new_stmt.xform = copy(stmt[*i].xform); + new_stmt.ir_stmt_node = NULL; + new_stmt.loop_level = stmt[*i].loop_level; + + stmt_nesting_level_.push_back(stmt_nesting_level_[*i]); + + /*std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = + test_data_dependences(ir, stmt[*i].code, part1, + stmt[*i].code, part2, freevar, index, + stmt_nesting_level_[*i], + stmt_nesting_level_[stmt.size() - 1]); + + + + + for (int k = 0; k < dv.first.size(); k++) + part1_to_part2++; + if (part1_to_part2 > 0 && part2_to_part1 > 0) + throw loop_error( + "loop error: Aborting, split resulted in impossible dependence cycle!"); + + for (int k = 0; k < dv.second.size(); k++) + part2_to_part1++; + + + + if (part1_to_part2 > 0 && part2_to_part1 > 0) + throw loop_error( + "loop error: Aborting, split resulted in impossible dependence cycle!"); + + + + if (part2_to_part1 > 0){ + temp_place_after = false; + assigned = true; + + }else if (part1_to_part2 > 0){ + temp_place_after = true; + + assigned = true; + } + + */ + + if (place_after) + assign_const(new_stmt.xform, dim - 1, cur_lex + 1); + else + assign_const(new_stmt.xform, dim - 1, cur_lex - 1); + + stmt.push_back(new_stmt); + dep.insert(); + what_stmt_num[*i] = stmt.size() - 1; + if (*i == stmt_num) + result.insert(stmt.size() - 1); + } + + } + // make adjacent lexical number available for new statements + if (place_after) { + lex[dim - 1] = cur_lex + 1; + shiftLexicalOrder(lex, dim - 1, 1); + } else { + lex[dim - 1] = cur_lex - 1; + shiftLexicalOrder(lex, dim - 1, -1); + } + // update dependence graph + int dep_dim = get_dep_dim_of(stmt_num, level); + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, std::vector<DependenceVector> > > D; + + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + if (what_stmt_num.find(i) != what_stmt_num.end() + && what_stmt_num.find(j->first) + != what_stmt_num.end()) + dep.connect(what_stmt_num[i], + what_stmt_num[j->first], j->second); + if (place_after + && what_stmt_num.find(j->first) + != what_stmt_num.end()) { + std::vector<DependenceVector> dvs; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.is_data_dependence() && dep_dim != -1) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + dvs.push_back(dv); + } + if (dvs.size() > 0) + D.push_back( + std::make_pair(what_stmt_num[j->first], + dvs)); + } else if (!place_after + && what_stmt_num.find(i) + != what_stmt_num.end()) { + std::vector<DependenceVector> dvs; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.is_data_dependence() && dep_dim != -1) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + dvs.push_back(dv); + } + if (dvs.size() > 0) + dep.connect(what_stmt_num[i], j->first, dvs); + + } + } else { + if (what_stmt_num.find(i) != what_stmt_num.end()) + dep.connect(what_stmt_num[i], j->first, j->second); + } + } else if (same_loop.find(j->first) != same_loop.end()) { + if (what_stmt_num.find(j->first) != what_stmt_num.end()) + D.push_back( + std::make_pair(what_stmt_num[j->first], + j->second)); + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + + } + + return result; +} + +void Loop::skew(const std::set<int> &stmt_nums, int level, + const std::vector<int> &skew_amount) { + if (stmt_nums.size() == 0) + return; + + // check for sanity of parameters + int ref_stmt_num = *(stmt_nums.begin()); + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (level < 1 || level > stmt[*i].loop_level.size()) + throw std::invalid_argument( + "invalid loop level " + to_string(level)); + for (int j = stmt[*i].loop_level.size(); j < skew_amount.size(); j++) + if (skew_amount[j] != 0) + throw std::invalid_argument("invalid skewing formula"); + } + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // set trasformation relations + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation r(n, n); + F_And *f_root = r.add_and(); + for (int j = 1; j <= n; j++) + if (j != 2 * level) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + } + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(2 * level), -1); + for (int j = 0; j < skew_amount.size(); j++) + if (skew_amount[j] != 0) + h.update_coef(r.input_var(2 * (j + 1)), skew_amount[j]); + + stmt[*i].xform = Composition(r, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // update dependence graph + if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); + j != dep.vertex[*i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence between skewed statements + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + coef_t lb = 0; + coef_t ub = 0; + for (int kk = 0; kk < skew_amount.size(); kk++) { + int cur_dep_dim = get_dep_dim_of(*i, kk + 1); + if (skew_amount[kk] > 0) { + if (lb != -posInfinity + && stmt[*i].loop_level[kk].type + == LoopLevelOriginal + && dv.lbounds[cur_dep_dim] + != -posInfinity) + lb += skew_amount[kk] + * dv.lbounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] + == 0 + && dv.ubounds[cur_dep_dim] + == 0)) + lb = -posInfinity; + } + if (ub != posInfinity + && stmt[*i].loop_level[kk].type + == LoopLevelOriginal + && dv.ubounds[cur_dep_dim] + != posInfinity) + ub += skew_amount[kk] + * dv.ubounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] + == 0 + && dv.ubounds[cur_dep_dim] + == 0)) + ub = posInfinity; + } + } else if (skew_amount[kk] < 0) { + if (lb != -posInfinity + && stmt[*i].loop_level[kk].type + == LoopLevelOriginal + && dv.ubounds[cur_dep_dim] + != posInfinity) + lb += skew_amount[kk] + * dv.ubounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] + == 0 + && dv.ubounds[cur_dep_dim] + == 0)) + lb = -posInfinity; + } + if (ub != posInfinity + && stmt[*i].loop_level[kk].type + == LoopLevelOriginal + && dv.lbounds[cur_dep_dim] + != -posInfinity) + ub += skew_amount[kk] + * dv.lbounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] + == 0 + && dv.ubounds[cur_dep_dim] + == 0)) + ub = posInfinity; + } + } + } + dv.lbounds[dep_dim] = lb; + dv.ubounds[dep_dim] = ub; + if ((dv.isCarried(dep_dim) + && dv.hasPositive(dep_dim)) && dv.quasi) + dv.quasi = false; + + if ((dv.isCarried(dep_dim) + && dv.hasNegative(dep_dim)) && !dv.quasi) + throw loop_error( + "loop error: Skewing is illegal, dependence violation!"); + dv.lbounds[dep_dim] = lb; + dv.ubounds[dep_dim] = ub; + if ((dv.isCarried(dep_dim) + && dv.hasPositive(dep_dim)) && dv.quasi) + dv.quasi = false; + + if ((dv.isCarried(dep_dim) + && dv.hasNegative(dep_dim)) && !dv.quasi) + throw loop_error( + "loop error: Skewing is illegal, dependence violation!"); + } + } + j->second = dvs; + } else { + // dependence from skewed statement to unskewed statement becomes jumbled, + // put distance value at skewed dimension to unknown + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + } + j->second = dvs; + } + for (int i = 0; i < dep.vertex.size(); i++) + if (stmt_nums.find(i) == stmt_nums.end()) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence from unskewed statement to skewed statement becomes jumbled, + // put distance value at skewed dimension to unknown + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + } + j->second = dvs; + } + } +} + + +void Loop::shift(const std::set<int> &stmt_nums, int level, int shift_amount) { + if (stmt_nums.size() == 0) + return; + + // check for sanity of parameters + int ref_stmt_num = *(stmt_nums.begin()); + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (level < 1 || level > stmt[*i].loop_level.size()) + throw std::invalid_argument( + "invalid loop level " + to_string(level)); + } + + // do nothing + if (shift_amount == 0) + return; + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // set trasformation relations + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + int n = stmt[*i].xform.n_out(); + + Relation r(n, n); + F_And *f_root = r.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + if (j == 2 * level) + h.update_const(shift_amount); + } + + stmt[*i].xform = Composition(r, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // update dependence graph + if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); + j != dep.vertex[*i].second.end(); j++) + if (stmt_nums.find(j->first) == stmt_nums.end()) { + // dependence from shifted statement to unshifted statement + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + if (dv.lbounds[dep_dim] != -posInfinity) + dv.lbounds[dep_dim] -= shift_amount; + if (dv.ubounds[dep_dim] != posInfinity) + dv.ubounds[dep_dim] -= shift_amount; + } + } + j->second = dvs; + } + for (int i = 0; i < dep.vertex.size(); i++) + if (stmt_nums.find(i) == stmt_nums.end()) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence from unshifted statement to shifted statement + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + if (dv.lbounds[dep_dim] != -posInfinity) + dv.lbounds[dep_dim] += shift_amount; + if (dv.ubounds[dep_dim] != posInfinity) + dv.ubounds[dep_dim] += shift_amount; + } + } + j->second = dvs; + } + } +} + +void Loop::scale(const std::set<int> &stmt_nums, int level, int scale_amount) { + std::vector<int> skew_amount(level, 0); + skew_amount[level - 1] = scale_amount; + skew(stmt_nums, level, skew_amount); +} + +void Loop::reverse(const std::set<int> &stmt_nums, int level) { + scale(stmt_nums, level, -1); +} + +void Loop::fuse(const std::set<int> &stmt_nums, int level) { + if (stmt_nums.size() == 0 || stmt_nums.size() == 1) + return; + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + int dim = 2 * level - 1; + // check for sanity of parameters + std::vector<int> ref_lex; + int ref_stmt_num; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (level <= 0 + || (level > (stmt[*i].xform.n_out() - 1) / 2 + || level > stmt[*i].loop_level.size())) + throw std::invalid_argument( + "invalid loop level " + to_string(level)); + if (ref_lex.size() == 0) { + ref_lex = getLexicalOrder(*i); + ref_stmt_num = *i; + } else { + std::vector<int> lex = getLexicalOrder(*i); + for (int j = 0; j < dim - 1; j += 2) + if (lex[j] != ref_lex[j]) + throw std::invalid_argument( + "statements for fusion must be in the same level-" + + to_string(level - 1) + " subloop"); + } + } + + // collect lexicographical order values from to-be-fused statements + std::set<int> lex_values; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + std::vector<int> lex = getLexicalOrder(*i); + lex_values.insert(lex[dim - 1]); + } + if (lex_values.size() == 1) + return; + // negative dependence would prevent fusion + + int dep_dim = get_dep_dim_of(ref_stmt_num, level); + + for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end(); + i++) { + ref_lex[dim - 1] = *i; + std::set<int> a = getStatements(ref_lex, dim - 1); + std::set<int>::iterator j = i; + j++; + for (; j != lex_values.end(); j++) { + ref_lex[dim - 1] = *j; + std::set<int> b = getStatements(ref_lex, dim - 1); + for (std::set<int>::iterator ii = a.begin(); ii != a.end(); ii++) + for (std::set<int>::iterator jj = b.begin(); jj != b.end(); + jj++) { + std::vector<DependenceVector> dvs; + dvs = dep.getEdge(*ii, *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim) + && dvs[k].hasNegative(dep_dim)) + throw loop_error( + "loop error: statements " + to_string(*ii) + + " and " + to_string(*jj) + + " cannot be fused together due to negative dependence"); + dvs = dep.getEdge(*jj, *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim) + && dvs[k].hasNegative(dep_dim)) + throw loop_error( + "loop error: statements " + to_string(*jj) + + " and " + to_string(*ii) + + " cannot be fused together due to negative dependence"); + } + } + } + + std::set<int> same_loop = getStatements(ref_lex, dim - 3); + + std::vector<std::set<int> > s = sort_by_same_loops(same_loop, level); + + std::set<int> s1; + std::set<int> s2; + std::set<int> s4; + std::vector<std::set<int> > s3; + for (std::set<int>::iterator kk = stmt_nums.begin(); kk != stmt_nums.end(); + kk++) + for (int i = 0; i < s.size(); i++) + if (s[i].find(*kk) != s[i].end()) { + s1.insert(s[i].begin(), s[i].end()); + s2.insert(i); + } + + s3.push_back(s1); + for (int i = 0; i < s.size(); i++) + if (s2.find(i) == s2.end()) { + s3.push_back(s[i]); + s4.insert(s[i].begin(), s[i].end()); + } + try { + std::vector<std::set<int> > s5; + s5.push_back(s1); + s5.push_back(s4); + + //Dependence Check for Ordering Constraint + //Graph<std::set<int>, bool> dummy = construct_induced_graph_at_level(s5, + // dep, dep_dim); + + Graph<std::set<int>, bool> g = construct_induced_graph_at_level(s3, dep, + dep_dim); + + s = typed_fusion(g); + } catch (const loop_error &e) { + + throw loop_error( + "statements cannot be fused together due to negative dependence"); + + } + + if (s3.size() == s.size()) { + int order = 0; + for (int i = 0; i < s.size(); i++) { + + for (std::set<int>::iterator it = s[i].begin(); it != s[i].end(); + it++) { + + assign_const(stmt[*it].xform, 2 * level - 2, order); + + } + + order++; + } + } else if (s3.size() > s.size()) { + + int order = 0; + for (int j = 0; j < s.size(); j++) { + std::set<int>::iterator it3; + for (it3 = s1.begin(); it3 != s1.end(); it3++) { + if (s[j].find(*it3) != s[j].end()) + break; + } + if (it3 != s1.end()) { + for (std::set<int>::iterator it = s1.begin(); it != s1.end(); + it++) + assign_const(stmt[*it].xform, 2 * level - 2, order); + + order++; + + } + + for (int i = 0; i < s3.size(); i++) { + std::set<int>::iterator it2; + + for (it2 = s3[i].begin(); it2 != s3[i].end(); it2++) { + if (s[j].find(*it2) != s[j].end()) + break; + } + + if (it2 != s3[i].end()) { + for (std::set<int>::iterator it = s3[i].begin(); + it != s3[i].end(); it++) + assign_const(stmt[*it].xform, 2 * level - 2, order); + + order++; + + } + } + } + + } else + throw loop_error("Typed Fusion Error"); + +} + + + +void Loop::distribute(const std::set<int> &stmt_nums, int level) { + if (stmt_nums.size() == 0 || stmt_nums.size() == 1) + return; + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + int dim = 2 * level - 1; + int ref_stmt_num; + // check for sanity of parameters + std::vector<int> ref_lex; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (level < 1 + || (level > (stmt[*i].xform.n_out() - 1) / 2 + || level > stmt[*i].loop_level.size())) + throw std::invalid_argument( + "invalid loop level " + to_string(level)); + if (ref_lex.size() == 0) { + ref_lex = getLexicalOrder(*i); + ref_stmt_num = *i; + } else { + std::vector<int> lex = getLexicalOrder(*i); + for (int j = 0; j <= dim - 1; j += 2) + if (lex[j] != ref_lex[j]) + throw std::invalid_argument( + "statements for distribution must be in the same level-" + + to_string(level) + " subloop"); + } + } + // find SCC in the to-be-distributed loop + int dep_dim = get_dep_dim_of(ref_stmt_num, level); + std::set<int> same_loop = getStatements(ref_lex, dim - 1); + Graph<int, Empty> g; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); + i++) + g.insert(*i); + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + std::vector<DependenceVector> dvs; + dvs = dep.getEdge(g.vertex[i].first, g.vertex[j].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim)) { + g.connect(i, j); + break; + } + dvs = dep.getEdge(g.vertex[j].first, g.vertex[i].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim)) { + g.connect(j, i); + break; + } + } + std::vector<std::set<int> > s = g.topoSort(); + // find statements that cannot be distributed due to dependence cycle + Graph<std::set<int>, Empty> g2; + for (int i = 0; i < s.size(); i++) { + std::set<int> t; + for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) + if (stmt_nums.find(g.vertex[*j].first) != stmt_nums.end()) + t.insert(g.vertex[*j].first); + if (!t.empty()) + g2.insert(t); + } + for (int i = 0; i < g2.vertex.size(); i++) + for (int j = i + 1; j < g2.vertex.size(); j++) + for (std::set<int>::iterator ii = g2.vertex[i].first.begin(); + ii != g2.vertex[i].first.end(); ii++) + for (std::set<int>::iterator jj = g2.vertex[j].first.begin(); + jj != g2.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs; + dvs = dep.getEdge(*ii, *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim)) { + g2.connect(i, j); + break; + } + dvs = dep.getEdge(*jj, *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].isCarried(dep_dim)) { + g2.connect(j, i); + break; + } + } + std::vector<std::set<int> > s2 = g2.topoSort(); + // nothing to distribute + if (s2.size() == 1) + throw loop_error( + "loop error: no statement can be distributed due to dependence cycle"); + std::vector<std::set<int> > s3; + for (int i = 0; i < s2.size(); i++) { + std::set<int> t; + for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) + std::set_union(t.begin(), t.end(), g2.vertex[*j].first.begin(), + g2.vertex[*j].first.end(), inserter(t, t.begin())); + s3.push_back(t); + } + // associate other affected statements with the right distributed statements + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); + i++) + if (stmt_nums.find(*i) == stmt_nums.end()) { + bool is_inserted = false; + int potential_insertion_point = 0; + for (int j = 0; j < s3.size(); j++) { + for (std::set<int>::iterator k = s3[j].begin(); + k != s3[j].end(); k++) { + std::vector<DependenceVector> dvs; + dvs = dep.getEdge(*i, *k); + for (int kk = 0; kk < dvs.size(); kk++) + if (dvs[kk].isCarried(dep_dim)) { + s3[j].insert(*i); + is_inserted = true; + break; + } + dvs = dep.getEdge(*k, *i); + for (int kk = 0; kk < dvs.size(); kk++) + if (dvs[kk].isCarried(dep_dim)) + potential_insertion_point = j; + } + if (is_inserted) + break; + } + if (!is_inserted) + s3[potential_insertion_point].insert(*i); + } + // set lexicographical order after distribution + int order = ref_lex[dim - 1]; + shiftLexicalOrder(ref_lex, dim - 1, s3.size() - 1); + for (std::vector<std::set<int> >::iterator i = s3.begin(); i != s3.end(); + i++) { + for (std::set<int>::iterator j = (*i).begin(); j != (*i).end(); j++) + assign_const(stmt[*j].xform, dim - 1, order); + order++; + } + // no need to update dependence graph + ; + return; +} + diff --git a/loop_cuda.cc b/loop_cuda.cc new file mode 100644 index 0000000..a23990d --- /dev/null +++ b/loop_cuda.cc @@ -0,0 +1,2123 @@ +/***************************************************************************** + Copyright (C) 2009 University of Utah + All Rights Reserved. + + Purpose: + Cudaize methods + + Notes: + + History: + 1/7/10 Created by Gabe Rudy by migrating code from loop.cc + 31/1/11 Modified by Protonu Basu +*****************************************************************************/ + +#include <code_gen/code_gen.h> +#include <code_gen/CG_stringBuilder.h> +#include <code_gen/output_repr.h> +#include <code_gen/CG_outputRepr.h> +#include "loop_cuda.hh" +#include "loop.hh" +#include <math.h> +#include <useful.h> +#include "omegatools.hh" +#include "ir_cudasuif.hh" +#include "ir_suif.hh" +#include "ir_suif_utils.hh" +#include "chill_error.hh" +#include <vector> + +using namespace omega; +char *k_cuda_texture_memory; //protonu--added to track texture memory type +char *k_cuda_constant_memory; //protonu--added to track constant memory type +//extern char *omega::k_cuda_texture_memory; //protonu--added to track texture memory type +extern char *omega::k_ocg_comment; + + +static int cudaDebug; +class CudaStaticInit{ public: CudaStaticInit(){ cudaDebug=0; //Change this to 1 for debug +}}; +static CudaStaticInit junkInitInstance__; + + + +std::string& upcase(std::string& s) +{ + for(int i=0; i<s.size(); i++) + s[i] = toupper(s[i]); + return s; +} + +void printVs(const std::vector<std::string>& curOrder){ + if(!cudaDebug) return; + for(int i=0; i<curOrder.size(); i++){ + if(i>0) + printf(","); + printf("%s", curOrder[i].c_str()); + } + printf("\n"); +} + +void printVS(const std::vector<std::string>& curOrder){ + //if(!cudaDebug) return; + for(int i=0; i<curOrder.size(); i++){ + if(i>0) + printf(","); + printf("%s", curOrder[i].c_str()); + } + printf("\n"); +} + +LoopCuda::~LoopCuda() { + const int m = stmt.size(); + for (int i = 0; i < m; i++) + stmt[i].code->clear(); +} + +bool LoopCuda::symbolExists(std::string s){ + if(symtab->lookup_sym(s.c_str(), SYM_VAR, false)) + return true; + if(globals->lookup_sym(s.c_str(), SYM_VAR, false)) + return true; + for(int i=0; i<idxNames.size(); i++) + for(int j=0; j<idxNames[i].size(); j++) + if(strcmp(idxNames[i][j].c_str(), s.c_str()) == 0) + return true; + return false; +} + +void LoopCuda::addSync(int stmt_num, std::string idxName) +{ + //we store these and code-gen inserts sync to omega comments where stmt + //in loop that has idxName being generated + syncs.push_back(make_pair(stmt_num,idxName)); +} + +void LoopCuda::renameIndex(int stmt_num, std::string idx, std::string newName) +{ + int level = findCurLevel(stmt_num, idx); + if(idxNames.size() <= stmt_num || idxNames[stmt_num].size() < level) + throw std::runtime_error("Invalid statment number of index"); + idxNames[stmt_num][level-1] = newName.c_str(); +} + + + +enum Type{ Int }; + +struct VarDefs{ + std::string name; + std::string secondName; + operand size_expr; //array size as an expression (can be a product of other variables etc) + type_node * type; + var_sym* in_data; //Variable of array to copy data in from (before kernel call) + var_sym* out_data; //Variable of array to copy data out to (after kernel call) + int size_2d; //-1 if linearized, the constant size N, of a NxN 2D array otherwise + bool tex_mapped; //protonu-- true if this variable will be texture mapped, so no need to pass it as a argument + bool cons_mapped; //protonu-- true if this variable will be constant mem mapped, so no need to pass it as a argument + std::string original_name; //this is such a hack, to store the original name, to store a table to textures used + int var_ref_size ; +}; + +tree_node_list* wrapInIfFromMinBound(tree_node_list* then_part, tree_for* loop, base_symtab* symtab, var_sym* bound_sym) +{ + tree_node_list* ub = loop->ub_list(); + tree_node_list_iter upli(ub); + while(!upli.is_empty()){ + tree_node *node = upli.step(); + if(node->kind() == TREE_INSTR && ((tree_instr*)node)->instr()->format() == inf_rrr) + { + in_rrr* ins = (in_rrr*)((tree_instr*)node)->instr(); + //expect the structure: cpy( _ = min(grab_me, _)) + if(ins->opcode() == io_cpy && ins->src1_op().is_instr()){ + ins = (in_rrr*)ins->src1_op().instr(); + if(ins->opcode() == io_min){ + tree_node_list* tnl = new tree_node_list; + tnl->append(if_node(symtab, fold_sle(operand(bound_sym), ins->src1_op().instr()->clone()), then_part)); + return tnl; + } + } + } + } + return then_part; //Failed to go to proper loop level +} + +/** + * This would be better if it was done by a CHiLL xformation instead of at codegen + * + * state: + * for(...) + * for(...) + * cur_body + * stmt1 + * + * stm1 is in-between two loops that are going to be reduced. The + * solution is to put stmt1 at the end of cur_body but conditionally run + * in on the last step of the for loop. + * + * A CHiLL command that would work better: + * + * for(...) + * stmt0 + * for(for i=0; i<n; i++) + * cur_body + * stmt1 + * => + * for(...) + * for(for i=0; i<n; i++) + * if(i==0) stmt0 + * cur_body + * if(i==n-1) stmt1 + */ + +std::vector<tree_for*> findCommentedFors(const char* index, tree_node_list* tnl){ + std::vector<tree_for *> result; + + tree_node_list_iter iter(tnl); + bool next_loop_ok = false; + while (!iter.is_empty()) { + tree_node *tn = iter.step(); + if (tn->kind() == TREE_INSTR && ((tree_instr*)tn)->instr()->opcode() == io_mrk) + { + instruction* inst = ((tree_instr*)tn)->instr(); + std::string comment; + if ((inst->peek_annote(k_ocg_comment) != NULL)) + { + immed_list *data = (immed_list *)(inst->peek_annote(k_ocg_comment)); + immed_list_iter data_iter(data); + if(!data_iter.is_empty()){ + immed first_immed = data_iter.step(); + if(first_immed.kind() == im_string) + comment = first_immed.string(); + } + } + if(comment.find("~cuda~") != std::string::npos + && comment.find("preferredIdx: ") != std::string::npos){ + std::string idx = comment.substr(comment.find("preferredIdx: ")+14,std::string::npos); + if(idx.find(" ") != std::string::npos) + idx = idx.substr(0,idx.find(" ")); + if(strcmp(idx.c_str(),index) == 0) + next_loop_ok = true; + } + } + if (tn->kind() == TREE_FOR){ + if(next_loop_ok){ + //printf("found loop %s\n", static_cast<tree_for *>(tn)->index()->name()); + result.push_back(static_cast<tree_for *>(tn)); + } + else{ + //printf("looking down for loop %s\n", static_cast<tree_for *>(tn)->index()->name()); + std::vector<tree_for*> t = findCommentedFors(index, static_cast<tree_for *>(tn)->body()); + std::copy(t.begin(), t.end(), back_inserter(result)); + } + next_loop_ok = false; + } + if (tn->kind() == TREE_IF) { + //printf("looking down if\n"); + tree_if *tni = static_cast<tree_if *>(tn); + std::vector<tree_for*> t = findCommentedFors(index, tni->then_part()); + std::copy(t.begin(), t.end(), back_inserter(result)); + } + } + + return result; +} + +tree_node_list* forReduce(tree_for* loop, var_sym* reduceIndex, proc_symtab* proc_syms) +{ + //We did the replacements all at once with recursiveFindPreferedIdxs + //replacements r; + //r.oldsyms.append(loop->index()); + //r.newsyms.append(reduceIndex); + //tree_for* new_loop = (tree_for*)loop->clone_helper(&r, true); + tree_for* new_loop = loop; + + //return body one loops in + tree_node_list* tnl = loop_body_at_level(new_loop, 1); + //wrap in conditional if necessary + tnl = wrapInIfFromMinBound(tnl, new_loop, proc_syms, reduceIndex); + return tnl; +} + +void recursiveFindRefs(tree_node_list* code, proc_symtab* proc_syms, replacements* r) +{ + if(code->parent() && code->scope()->is_block()) + ((block_symtab*)code->scope())->find_exposed_refs(proc_syms, r); + tree_node_list_iter tnli(code); + while (!tnli.is_empty()) { + tree_node *node = tnli.step(); + //printf("node kind: %d\n", node->kind()); + if(node->is_instr()) + { + tree_instr* t_instr = (tree_instr*)node; + t_instr->find_exposed_refs(proc_syms, r); + } + if(node->is_block()){ + recursiveFindRefs(static_cast<tree_block *>(node)->body(), proc_syms, r); + } + else if(node->is_for()){ + tree_for* tn_for = static_cast<tree_for *>(node); + //Find refs in statemetns and body + tn_for->find_exposed_refs(proc_syms, r); + //recursiveFindRefs(tn_for->body(), proc_syms, r); + } + } +} + +tree_node_list* recursiveFindReplacePreferedIdxs(tree_node_list* code, proc_symtab* proc_syms, + proc_sym* cudaSync, func_type* unkown_func, + std::map<std::string, var_sym*>& loop_idxs) +{ + tree_node_list* tnl = new tree_node_list; + tree_node_list_iter tnli(code); + var_sym* idxSym=0; + bool sync = false; + std::vector<tree_node*> r1; + std::vector<tree_node_list*> r2; + while (!tnli.is_empty()) { + tree_node *node = tnli.step(); + //printf("node kind: %d\n", node->kind()); + if(node->is_instr()) + { + if(((tree_instr*)node)->instr()->format() == inf_rrr){ + in_rrr* inst = (in_rrr*)((tree_instr*)node)->instr(); + if(inst->opcode() == io_mrk){ + std::string comment; + if ((inst->peek_annote(k_ocg_comment) != NULL)) + { + immed_list *data = (immed_list *)(inst->peek_annote(k_ocg_comment)); + immed_list_iter data_iter(data); + if(!data_iter.is_empty()){ + immed first_immed = data_iter.step(); + if(first_immed.kind() == im_string) + comment = first_immed.string(); + } + } + if(comment.find("~cuda~") != std::string::npos + && comment.find("preferredIdx: ") != std::string::npos){ + std::string idx = comment.substr(comment.find("preferredIdx: ")+14,std::string::npos); + if(idx.find(" ") != std::string::npos) + idx = idx.substr(0,idx.find(" ")); + //printf("sym_tab preferred index: %s\n", idx.c_str()); + if(loop_idxs.find(idx) != loop_idxs.end()) + idxSym = loop_idxs.find(idx)->second; + //Get the proc variable sybol for this preferred index + if(idxSym == 0){ + idxSym = (var_sym*)proc_syms->lookup_sym(idx.c_str(), SYM_VAR, false); + //printf("idx not found: lookup %p\n", idxSym); + if(!idxSym){ + idxSym = new var_sym(type_s32, (char*)idx.c_str()); + proc_syms->add_sym(idxSym); + //printf("idx created and inserted\n"); + } + //Now insert into our map for future + loop_idxs.insert(make_pair(idx, idxSym)); + } + //See if we have a sync as well + if(comment.find("sync") != std::string::npos){ + //printf("Inserting sync after current block\n"); + sync = true; + } + } + } + } + tnl->append(node); + } + else if(node->is_block()){ + tree_block* b = static_cast<tree_block *>(node); + b->set_body(recursiveFindReplacePreferedIdxs(b->body(), proc_syms, cudaSync, unkown_func, loop_idxs)); + tnl->append(b); + } + else if(node->is_for()){ + tree_for* tn_for = static_cast<tree_for *>(node); + if(idxSym){ + //Replace the current tn_for's index variable with idxSym + //printf("replacing sym %s -> %s\n", tn_for->index()->name(), idxSym->name()); + replacements r; + r.oldsyms.append(tn_for->index()); + r.newsyms.append(idxSym); + tree_for* new_loop = (tree_for*)tn_for->clone_helper(&r, true); + idxSym = 0; //Reset for more loops in this tnl + new_loop->set_body(recursiveFindReplacePreferedIdxs(new_loop->body(), proc_syms, cudaSync, unkown_func, loop_idxs)); + tnl->append(new_loop); + + if(sync){ + in_cal *the_call = + new in_cal(type_s32, operand(), operand(new in_ldc(unkown_func->ptr_to(), operand(), immed(cudaSync))), 0); + tnl->append(new tree_instr(the_call)); + //tnl->print(); + sync = true; + } + }else{ + tn_for->set_body(recursiveFindReplacePreferedIdxs(tn_for->body(), proc_syms, cudaSync, unkown_func, loop_idxs)); + tnl->append(tn_for); + } + }else if (node->kind() == TREE_IF) { + tree_if *tni = static_cast<tree_if *>(node); + tni->set_then_part(recursiveFindReplacePreferedIdxs(tni->then_part(), proc_syms, cudaSync, unkown_func, loop_idxs)); + tnl->append(tni); + } + } + //Do this after the loop to not screw up the pointer interator + /* + for(int i=0; i<r1.size(); i++){ + swap_node_for_node_list(r1[i],r2[i]); + }*/ + return tnl; +} + +// loop_vars -> array references +// loop_idxs -> <idx_name,idx_sym> map for when we encounter a loop with a different preferredIndex +// dim_vars -> out param, fills with <old,new> var_sym pair for 2D array dimentions (messy stuff) +tree_node_list* swapVarReferences(tree_node_list* code, replacements* r, CG_suifBuilder *ocg, + std::map<std::string, var_sym*>& loop_vars, + proc_symtab *proc_syms, + std::vector< std::pair<var_sym*,var_sym*> >& dim_vars) +{ + //Iterate over every expression, looking up each variable and type + //reference used and possibly replacing it or adding it to our symbol + //table + // + //We use the built-in cloning helper methods to seriously help us with this! + + //Need to do a recursive mark + recursiveFindRefs(code, proc_syms, r); + + + //We can't rely on type_node->clone() to do the heavy lifting when the + //old type is a two dimentional array with variable upper bounds as + //that requires creating and saveing variable references to the upper + //bounds. So we do one pass over the oldtypes doing this type of + //conversion, putting results in the fixed_types map for a second pass + //to pick up. + std::map<type_node*,type_node*> fixed_types; //array_types needing their upper bound installed + type_node_list_iter tlip(&r->oldtypes); + while(!tlip.is_empty()) + { + type_node* old_tn = tlip.step(); + type_node* new_tn = 0; + type_node* base_type = old_tn; + std::vector< std::pair<var_sym*, type_node*> > variable_upper_bouneds; + if(old_tn->is_ptr()){ + while (base_type->is_array() || base_type->is_ptr()) { + if (base_type->is_array()){ + array_bound ub = ((array_type*)base_type)->upper_bound(); + if(ub.is_variable()){ + var_sym* old_ub = (var_sym*)ub.variable(); + var_sym *new_ub = proc_syms->new_unique_var(type_s32); + dim_vars.push_back(std::pair<var_sym* , var_sym*>(old_ub, new_ub)); + variable_upper_bouneds.push_back( std::pair<var_sym*, type_node*>(new_ub, base_type) ); + } + base_type = static_cast<array_type *>(base_type)->elem_type(); + } + else if (base_type->is_ptr()) + base_type = static_cast<ptr_type *>(base_type)->ref_type(); + } + } + for (int i = variable_upper_bouneds.size()-1; i >= 0; i--) { + var_sym *var_ub = variable_upper_bouneds[i].first; + type_node* old_tn = variable_upper_bouneds[i].second; + if(new_tn == 0) + new_tn = new array_type(base_type, array_bound(1), array_bound(var_ub)); + else + new_tn = new array_type(new_tn, array_bound(1), array_bound(var_ub)); + proc_syms->add_type(new_tn); + fixed_types.insert(std::pair<type_node*,type_node*>(old_tn, new_tn)); + } + if(new_tn){ + if(old_tn->is_ptr()){ + new_tn = new ptr_type(new_tn); + proc_syms->add_type(new_tn); + } + fixed_types.insert(std::pair<type_node*,type_node*>(old_tn, new_tn)); + } + } + + //Quickly look for modifiers on our our array types (__shared__ float [][]) + type_node_list_iter tliq(&r->oldtypes); + while(!tliq.is_empty()) + { + type_node* old_tn = tliq.step(); + if(old_tn->is_modifier()){ + type_node* base_type = static_cast<modifier_type *>(old_tn)->base(); + if(fixed_types.find(base_type) != fixed_types.end()){ + type_node* fixed_base = (*fixed_types.find(base_type)).second; + //printf("Fix modifier with fixed base\n"); + //This should work to copy over the annotations, but apparently doesn't work so well + type_node* new_tn = new modifier_type(static_cast<modifier_type*>(old_tn)->op(), fixed_base); + old_tn->copy_annotes(new_tn); + fixed_types.insert(std::pair<type_node*,type_node*>(old_tn, new_tn)); + } + } + } + + //Run through the types and create entries in r->newtypes but don't install + type_node_list_iter tli(&r->oldtypes); + while(!tli.is_empty()) + { + type_node* old_tn = tli.step(); + type_node* new_tn = 0; + + //If we recorded this as fixed by our special case, use that type + //instead of cloning. + if(fixed_types.find(old_tn) != fixed_types.end()){ + new_tn = (*fixed_types.find(old_tn)).second; + //printf("Reusing fixed typ %u: ", new_tn->type_id()); + }else{ + new_tn = old_tn->clone(); + //printf("Cloning type %u: ", old_tn->type_id()); + } + new_tn = proc_syms->install_type(new_tn); + + //Ok, there is a weird case where an array type that has var_sym as + //their upper bounds can't be covered fully in this loop or the + //var_sym loop, so we need special code. + /* + if(old_tn->op() == TYPE_PTR && ((ptr_type*)old_tn)->ref_type()->op() == TYPE_ARRAY){ + array_type* outer_array = (array_type*)((ptr_type*)old_tn)->ref_type(); + array_bound ub = outer_array->upper_bound(); + if(ub.is_variable()){ + var_sym* old_ub = (var_sym*)ub.variable(); + var_sym* new_ub = (var_sym*)((array_type*)((ptr_type*)new_tn)->ref_type())->upper_bound().variable(); + //r->oldsyms.append(old_ub); + fix_ub.insert(std::pair<var_sym*,array_type*>(old_ub, (array_type*)((ptr_type*)new_tn)->ref_type())); + dim_vars.push_back(std::pair<var_sym* , var_sym*>(old_ub, new_ub)); + printf("array var_sym: %p\n", new_ub); + } + if(outer_array->elem_type()->op() == TYPE_ARRAY) + { + array_type* inner_array = (array_type*)outer_array->elem_type(); + array_bound ub = inner_array->upper_bound(); + if(ub.is_variable()){ + var_sym* old_ub = (var_sym*)ub.variable(); + var_sym* new_ub = (var_sym*)((array_type*)((array_type*)((ptr_type*)new_tn)->ref_type())->elem_type())->upper_bound().variable(); + dim_vars.push_back(std::pair<var_sym* , var_sym*>(old_ub, new_ub)); + printf("array var_sym: %p\n", new_ub); + //r->oldsyms.append(old_ub); + fix_ub.insert(std::pair<var_sym*,array_type*>(old_ub, (array_type*)((array_type*)((ptr_type*)new_tn)->ref_type())->elem_type())); + } + } + } + */ + r->newtypes.append(new_tn); + } + + //printf("proc_syms symbol run through\n"); + //proc_syms->print(); + + //Run through the syms creating new copies + sym_node_list_iter snli(&r->oldsyms); + while(!snli.is_empty()) + { + sym_node *old_sn = snli.step(); + + if(loop_vars.count(std::string(old_sn->name())) > 0) + { + r->newsyms.append(loop_vars[std::string(old_sn->name())]); + //printf("def exists: %s\n", old_sn->name()); + }else{ + sym_node *new_sn = old_sn->copy(); + if(new_sn->is_var()){ + var_sym* var = (var_sym*)new_sn; + type_node* new_type = var->type()->clone_helper(r); + + //TODO: Have a tagged list of variables to make shared + //Make local 2D arrays __shared__ + if(new_type->op() == TYPE_ARRAY && ((array_type*)new_type)->elem_type()->op() == TYPE_ARRAY){ + //protonu--changes suggested by Malik + //printf("Adding __shared__ annotation to : %s\n", new_sn->name()); + //new_type = ocg->ModifyType(new_type, "__shared__"); + //proc_syms->add_type(new_type); + } + var->set_type(new_type); + } + proc_syms->add_sym(new_sn); + r->newsyms.append(new_sn); + //printf("def new: %s\n", new_sn->name()); + } + } + + //printf("proc_syms var runthrough\n"); + //proc_syms->print(); + return code->clone_helper(r); +} + +bool LoopCuda::validIndexes(int stmt, const std::vector<std::string>& idxs){ + for(int i=0; i<idxs.size(); i++){ + bool found = false; + for(int j=0; j<idxNames[stmt].size(); j++){ + if(strcmp(idxNames[stmt][j].c_str(), idxs[i].c_str()) == 0){ + found=true; + } + } + if(!found){ + return false; + } + } + return true; +} + + +bool LoopCuda::cudaize_v2(std::string kernel_name, std::map<std::string, int> array_dims, + std::vector<std::string> blockIdxs, std::vector<std::string> threadIdxs) +{ + int stmt_num = 0; + if(cudaDebug){ + printf("cudaize_v2(%s, {", kernel_name.c_str()); + //for( + printf("}, blocks={"); printVs(blockIdxs); printf("}, thread={"); printVs(threadIdxs); printf("})\n"); + } + + this->array_dims = array_dims; + if(!validIndexes(stmt_num, blockIdxs)){ + throw std::runtime_error("One of the indexes in the block list was not " + "found in the current set of indexes."); + } + if(!validIndexes(stmt_num, threadIdxs)){ + throw std::runtime_error("One of the indexes in the thread list was not " + "found in the current set of indexes."); + } + if(blockIdxs.size() ==0) + throw std::runtime_error("Cudaize: Need at least one block dimention"); + int block_level=0; + //Now, we will determine the actual size (if possible, otherwise + //complain) for the block dimentions and thread dimentions based on our + //indexes and the relations for our stmt; + for(int i=0; i<blockIdxs.size(); i++){ + int level = findCurLevel(stmt_num, blockIdxs[i]); + int ub,lb; + extractCudaUB(stmt_num,level,ub,lb); + if(lb!= 0){ + //attempt to "normalize" the loop with an in-place tile and then re-check our bounds + if(cudaDebug) printf("Cudaize: doing tile at level %d to try and normalize lower bounds\n", level); + tile(stmt_num,level,1,level,CountedTile); + idxNames[stmt_num].insert(idxNames[stmt_num].begin()+(level),"");//TODO: possibly handle this for all sibling stmts + extractCudaUB(stmt_num,level,ub,lb); + } + if(lb != 0){ + char buf[1024]; + sprintf(buf, "Cudaize: Loop at level %d does not have 0 as it's lower bound", level); + throw std::runtime_error(buf); + } + if(ub < 0){ + char buf[1024]; + sprintf(buf, "Cudaize: Loop at level %d does not have a hard upper bound", level); + throw std::runtime_error(buf); + } + if(cudaDebug) printf("block idx %s level %d lb: %d ub %d\n", blockIdxs[i].c_str(), level, lb, ub); + if(i == 0){ + block_level = level; + cu_bx = ub+1; + idxNames[stmt_num][level-1] = "bx"; + } + else if(i == 1){ + cu_by = ub+1; + idxNames[stmt_num][level-1] = "by"; + } + } + if(!cu_by) + block_level=0; + int thread_level1 = 0; + int thread_level2 = 0; + for(int i=0; i<threadIdxs.size(); i++){ + int level = findCurLevel(stmt_num, threadIdxs[i]); + int ub,lb; + extractCudaUB(stmt_num,level,ub,lb); + if(lb!= 0){ + //attempt to "normalize" the loop with an in-place tile and then re-check our bounds + if(cudaDebug) printf("Cudaize: doing tile at level %d to try and normalize lower bounds\n", level); + tile(stmt_num,level,1,level,CountedTile); + idxNames[stmt_num].insert(idxNames[stmt_num].begin()+(level),""); + extractCudaUB(stmt_num,level,ub,lb); + } + if(lb != 0){ + char buf[1024]; + sprintf(buf, "Cudaize: Loop at level %d does not have 0 as it's lower bound", level); + throw std::runtime_error(buf); + } + if(ub < 0){ + char buf[1024]; + sprintf(buf, "Cudaize: Loop at level %d does not have a hard upper bound", level); + throw std::runtime_error(buf); + } + + if(cudaDebug) printf("thread idx %s level %d lb: %d ub %d\n", threadIdxs[i].c_str(), level, lb, ub); + if(i == 0){ + thread_level1 = level; + cu_tx = ub+1; + idxNames[stmt_num][level-1] = "tx"; + } + else if(i == 1){ + thread_level2 = level; + cu_ty = ub+1; + idxNames[stmt_num][level-1] = "ty"; + } + else if(i == 2){ + cu_tz = ub+1; + idxNames[stmt_num][level-1] = "tz"; + } + } + if(!cu_ty) + thread_level1 = 0; + if(!cu_tz) + thread_level2 = 0; + + //Make changes to nonsplitlevels + const int m = stmt.size(); + for (int i = 0; i < m; i++) { + if(block_level){ + //stmt[i].nonSplitLevels.append((block_level)*2); + stmt_nonSplitLevels[i].append((block_level)*2); + } + if(thread_level1){ + //stmt[i].nonSplitLevels.append((thread_level1)*2); + stmt_nonSplitLevels[i].append((thread_level1)*2); + } + if(thread_level2){ + //stmt[i].nonSplitLevels.append((thread_level1)*2); + stmt_nonSplitLevels[i].append((thread_level1)*2); + } + } + + if(cudaDebug) { + printf("Codegen: current names: "); + printVS(idxNames[stmt_num]); + } + //Set codegen flag + code_gen_flags |= GenCudaizeV2; + + //Save array dimention sizes + this->array_dims = array_dims; + cu_kernel_name = kernel_name.c_str(); + +} + +tree_node_list* LoopCuda::cudaize_codegen_v2() +{ + //printf("cudaize codegen V2\n"); + CG_suifBuilder *ocg = dynamic_cast<CG_suifBuilder*>(ir->builder()); + if(!ocg) return false; + + //protonu--adding an annote to track texture memory type + ANNOTE(k_cuda_texture_memory, "cuda texture memory", TRUE); + ANNOTE(k_cuda_constant_memory, "cuda constant memory", TRUE); + int tex_mem_on = 0; + int cons_mem_on = 0; + + + + CG_outputRepr* repr; + std::vector<VarDefs> arrayVars; + std::vector<VarDefs> localScopedVars; + + std::vector<IR_ArrayRef *> ro_refs; + std::vector<IR_ArrayRef *> wo_refs; + std::set<std::string> uniqueRefs; + std::set<std::string> uniqueWoRefs; + //protonu--let's try a much simpler approach of a map instead + //we also keep a map for constant memories + std::map<std::string , var_sym *>tex_ref_map; + std::map<std::string , var_sym *>cons_ref_map; + + for(int j=0; j<stmt.size(); j++) + { + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[j].code); + for (int i = 0; i < refs.size(); i++) + { + //printf("ref %s wo %d\n", static_cast<const char*>(refs[i]->name()), refs[i]->is_write()); + var_sym* var = symtab->lookup_var((char*)refs[i]->name().c_str(),false); + //If the array is not a parameter, then it's a local array and we + //want to recreate it as a stack variable in the kernel as opposed to + //passing it in. + if(!var->is_param()) + continue; + if (uniqueRefs.find(refs[i]->name()) == uniqueRefs.end()) + { + uniqueRefs.insert(refs[i]->name()); + if(refs[i]->is_write()){ + uniqueWoRefs.insert(refs[i]->name()); + wo_refs.push_back(refs[i]); + } + else + ro_refs.push_back(refs[i]); + } + if (refs[i]->is_write() && uniqueWoRefs.find(refs[i]->name()) == uniqueWoRefs.end()){ + uniqueWoRefs.insert(refs[i]->name()); + wo_refs.push_back(refs[i]); + //printf("adding %s to wo\n", static_cast<const char*>(refs[i]->name())); + } + } + } + + // printf("reading from array "); + // for(int i=0; i<ro_refs.size(); i++) + // printf("'%s' ", ro_refs[i]->name().c_str()); + // printf("and writting to array "); + // for(int i=0; i<wo_refs.size(); i++) + // printf("'%s' ", wo_refs[i]->name().c_str()); + // printf("\n"); + + const char* gridName = "dimGrid"; + const char* blockName = "dimBlock"; + + //TODO: Could allow for array_dims_vars to be a mapping from array + //references to to variable names that define their length. + var_sym* dim1 = 0; + var_sym* dim2 = 0; + + for(int i=0; i<wo_refs.size(); i++) + { + //TODO: Currently assume all arrays are floats of one or two dimentions + var_sym* outArray = 0; + std::string name = wo_refs[i]->name(); + outArray = symtab->lookup_var((char*)name.c_str(),false); + + VarDefs v; + v.size_2d = -1; + char buf[32]; + snprintf(buf, 32, "devO%dPtr", i+1); + v.name = buf; + if(outArray->type()->is_ptr()) + if(((ptr_type *)(outArray->type()))->ref_type()->is_array()) + v.type = ((array_type *)(((ptr_type *)(outArray->type()))->ref_type()))->elem_type(); + else + v.type = ((ptr_type *)(outArray->type()))->ref_type(); + else + v.type = type_f32; + v.tex_mapped = false; + v.cons_mapped = false; + v.original_name = wo_refs[i]->name(); + //Size of the array = dim1 * dim2 * num bytes of our array type + + //If our input array is 2D (non-linearized), we want the actual + //dimentions of the array + CG_outputRepr* size; + //Lookup in array_dims + std::map<std::string, int>::iterator it = array_dims.find(name.c_str()); + if(outArray->type()->is_ptr() && outArray->type()->ref_type(0)->is_array()) + { + array_type* t = (array_type*)outArray->type()->ref_type(0); + v.size_2d = t->upper_bound().constant()+1; + printf("Detected 2D array sized of %d for %s\n", v.size_2d, (char*)wo_refs[i]->name().c_str()); + size = ocg->CreateInt(v.size_2d * v.size_2d); + }else if(it != array_dims.end()){ + int ref_size = it->second; + v.var_ref_size = ref_size; + size = ocg->CreateInt(ref_size); + } + else{ + if(dim1){ + size = ocg->CreateTimes(new CG_suifRepr(operand(dim1)), + new CG_suifRepr(operand(dim2))); + }else{ + char buf[1024]; + sprintf(buf, "CudaizeCodeGen: Array reference %s does not have a " + "detectable size or specififed dimentions", name.c_str()); + throw std::runtime_error(buf); + } + } + v.size_expr = operand(static_cast<CG_suifRepr*>(ocg->CreateTimes( + size, + ocg->CreateInt(v.type->size()/8)))->GetExpression()); + v.in_data = 0; + v.out_data = outArray; + //Check for in ro_refs and remove it at this point + std::vector<IR_ArrayRef *>::iterator it_; + for(it_ = ro_refs.begin(); it_ != ro_refs.end(); it_++) + { + if((*it_)->name() == wo_refs[i]->name()){ + break; + } + } + if(it_ != ro_refs.end()) + { + v.in_data = outArray; + ro_refs.erase(it_); + } + + arrayVars.push_back(v); + + } + + //protonu-- assuming that all texture mapped memories were originally read only mems + //there should be safety checks for that, will implement those later + + int cs_ref_size = 0; + + for(int i=0; i<ro_refs.size(); i++) + { + var_sym* inArray = 0; + std::string name = ro_refs[i]->name(); + inArray = symtab->lookup_var((char*)name.c_str(),false); + VarDefs v; + v.size_2d = -1; + char buf[32]; + snprintf(buf, 32, "devI%dPtr", i+1); + v.name = buf; + if(inArray->type()->is_ptr()) + if(((ptr_type *)(inArray->type()))->ref_type()->is_array()) + v.type = ((array_type *)(((ptr_type *)(inArray->type()))->ref_type()))->elem_type(); + else + v.type = ((ptr_type *)(inArray->type()))->ref_type(); + else + v.type = type_f32; + v.tex_mapped = false; + v.cons_mapped = false; + v.original_name = ro_refs[i]->name(); + if ( texture != NULL) + v.tex_mapped = (texture->is_array_tex_mapped(name.c_str()))? true:false; //protonu-track tex mapped vars + if (v.tex_mapped){ + printf("this variable %s is mapped to texture memory", name.c_str()); + } + if ( constant_mem != NULL) + v.cons_mapped = (constant_mem->is_array_cons_mapped(name.c_str()))? true:false; //protonu-track tex mapped vars + if (v.cons_mapped){ + printf("this variable %s is mapped to constant memory", name.c_str()); + } + + //Size of the array = dim1 * dim2 * num bytes of our array type + + //If our input array is 2D (non-linearized), we want the actual + //dimentions of the array (as it might be less than cu_n + CG_outputRepr* size; + //Lookup in array_dims + std::map<std::string, int>::iterator it = array_dims.find(name.c_str()); + int ref_size = 0; + if(inArray->type()->is_ptr() && inArray->type()->ref_type(0)->is_array()) + { + array_type* t = (array_type*)inArray->type()->ref_type(0); + v.size_2d = t->upper_bound().constant()+1; + printf("Detected 2D array sized of %d for %s\n", v.size_2d, (char*)ro_refs[i]->name().c_str()); + size = ocg->CreateInt(v.size_2d * v.size_2d); + }else if(it != array_dims.end()){ + ref_size = it->second; + v.var_ref_size = ref_size; + size = ocg->CreateInt(ref_size); + }else{ + if(dim1){ + size = ocg->CreateTimes(new CG_suifRepr(operand(dim1)), + new CG_suifRepr(operand(dim2))); + }else{ + char buf[1024]; + sprintf(buf, "CudaizeCodeGen: Array reference %s does not have a " + "detectable size or specififed dimentions", name.c_str()); + throw std::runtime_error(buf); + } + } + + + + v.size_expr = operand(static_cast<CG_suifRepr*>(ocg->CreateTimes( + size, + ocg->CreateInt(v.type->size()/8)))->GetExpression()); + + v.in_data = inArray; + v.out_data = 0; + arrayVars.push_back(v); + } + + + if(arrayVars.size() < 2) + { + fprintf(stderr, "cudaize error: Did not find two arrays being accessed\n"); + return false; + } + + //protonu--debugging tool--the printf statement + //tex_mem_on signals use of tex mem + for(int i=0; i<arrayVars.size(); i++) + { + //printf("var name %s, tex_mem used %s\n", arrayVars[i].name.c_str(), (arrayVars[i].tex_mapped)?"true":"false"); + if (arrayVars[i].tex_mapped ) tex_mem_on ++; + if (arrayVars[i].cons_mapped ) cons_mem_on ++; + } + + //Add CUDA function extern prototypes and function types + func_type* unkown_func = new func_type(type_s32); //function on unkown args that returns a i32 + unkown_func = (func_type*)symtab->install_type(unkown_func); + func_type* void_func = new func_type(type_void); //function on unkown args that returns a void + void_func = (func_type*)globals->install_type(void_func); + func_type* float_func = new func_type(type_f32); //function on unkown args that returns a float + float_func = (func_type*)globals->install_type(float_func); + + type_node* result = ocg->ModifyType(type_void, "__global__"); + result = globals->install_type(result); + func_type* kernel_type = new func_type(result); //function returns a '__global__ void' + + int numArgs = arrayVars.size() + (dim1 ? 2 : 0) + localScopedVars.size(); + //protonu--need to account for texture memory here, reduce the #args + if( tex_mem_on ) numArgs -= tex_mem_on; + if( cons_mem_on ) numArgs -= cons_mem_on; + kernel_type->set_num_args(numArgs); + int argCount = 0; + for(int i=0; i<arrayVars.size(); i++) + { + type_node* fptr; + if(arrayVars[i].in_data) + fptr = arrayVars[i].in_data->type()->clone(); + else + fptr = arrayVars[i].out_data->type()->clone(); + //protonu--skip this for texture mems + if( arrayVars[i].tex_mapped != true && arrayVars[i].cons_mapped !=true ) + kernel_type->set_arg_type(argCount++, fptr); + } + if(dim1){ + kernel_type->set_arg_type(argCount++, type_s32); //width x height dimentions + kernel_type->set_arg_type(argCount++, type_s32); + } + kernel_type = (func_type*)globals->install_type(kernel_type); + + proc_sym* cudaMalloc = globals->new_proc(unkown_func, src_c, "cudaMalloc"); + proc_sym* cudaMemcpy = globals->new_proc(unkown_func, src_c, "cudaMemcpy"); + proc_sym* cudaFree = globals->new_proc(unkown_func, src_c, "cudaFree"); + proc_sym* cudaSync = globals->new_proc(void_func, src_c, "__syncthreads"); + proc_sym* cudaBind = globals->new_proc(unkown_func, src_c, "cudaBindTexture"); + proc_sym* cudaMemcpySym = globals->new_proc(unkown_func, src_c, "cudaMemcpyToSymbol"); + + + //protonu-removing Gabe's function, introducing mine, this is pretty cosmetic + //proc_sym* cudaFetch = globals->new_proc(float_func, src_c, "tex1Dfetch"); + proc_sym* tex1D = globals->new_proc(float_func, src_c, "tex1Dfetch"); + + var_sym *cudaMemcpyHostToDevice = new var_sym(type_s32, "cudaMemcpyHostToDevice"); + var_sym *cudaMemcpyDeviceToHost = new var_sym(type_s32, "cudaMemcpyDeviceToHost"); + cudaMemcpyDeviceToHost->set_param(); + cudaMemcpyHostToDevice->set_param(); + globals->add_sym(cudaMemcpyHostToDevice); + globals->add_sym(cudaMemcpyDeviceToHost); + + //protonu--adding the bool tex_mem to the structure struct_type + //to bypass the re-naming of struct texture, this is a hack fix + struct_type* texType = new struct_type(TYPE_GROUP, 0, "texture<float, 1, cudaReadModeElementType>", 0, true); + immed_list *iml_tex = new immed_list; + iml_tex->append(immed("texture memory")); + texType->append_annote(k_cuda_texture_memory, iml_tex); + //protonu--end my changes + texType = (struct_type*)globals->install_type(texType); + //protonu--should register the locals later on + //when we do the bind operation + //var_sym* texRef = new var_sym(texType, "texRef"); + //globals->add_sym(texRef); + + //Add our mallocs (and input array memcpys) + for(int i=0; i<arrayVars.size(); i++) + { + //protonu--check if the variable is not a tex-mapped variable. If it is tex mapped + // allow a malloc and memcpy operation, and a bind, but only if it is tex mapped, but dont call + // the kernel with it as an argument. + + //Make a pointer of type a[i].type + //type_node* fptr = new ptr_type(arrayVars[i].type->clone()); + //protonu--temporary change + type_node* fptr = new ptr_type(arrayVars[i].type); + fptr = symtab->install_type(fptr); + var_sym *dvs = new var_sym(fptr, const_cast<char*>( + arrayVars[i].name.c_str())); + dvs->set_addr_taken(); + symtab->add_sym(dvs); + + //cudaMalloc args + //protonu--no cudaMalloc required for constant memory + tree_node_list* tnl = new tree_node_list; + if(arrayVars[i].cons_mapped != true ) + { + in_cal *the_call = + new in_cal(type_s32, operand(), operand(new in_ldc(unkown_func->ptr_to(), operand(), immed(cudaMalloc))), 2); + the_call->set_argument(0, operand(new in_ldc(type_void->ptr_to()->ptr_to(), operand(), immed(dvs)))); + the_call->set_argument(1, arrayVars[i].size_expr); + + tnl->append(new tree_instr(the_call)); + setup_code = ocg->StmtListAppend(setup_code, + new CG_suifRepr(tnl)); + } + if(arrayVars[i].in_data) + { + //cudaMemcpy args + //protonu-- no cudaMemcpy required for constant memory + if ( arrayVars[i].cons_mapped != true ) + { + in_cal *the_call = + new in_cal(type_s32, operand(), operand(new in_ldc(unkown_func->ptr_to(), operand(), immed(cudaMemcpy))), 4); + the_call->set_argument(0, operand(dvs)); + the_call->set_argument(1, operand(arrayVars[i].in_data)); + the_call->set_argument(2, arrayVars[i].size_expr.clone()); + the_call->set_argument(3, operand(cudaMemcpyHostToDevice)); + + tnl = new tree_node_list; + tnl->append(new tree_instr(the_call)); + setup_code = ocg->StmtListAppend(setup_code, + new CG_suifRepr(tnl)); + } + + //protonu--check if the arrayvar is tex mapped + if(arrayVars[i].tex_mapped == true) + { + //Need a texture reference variable + char buf[32]; + snprintf(buf, 32, "tex%dRef", i+1); + arrayVars[i].secondName = buf; + + var_sym* texRef = new var_sym(texType, buf); + //printf("\n putting in %s\n", arrayVars[i].original_name.c_str()); + tex_ref_map[arrayVars[i].original_name] = texRef; + globals->add_sym(texRef); + //protonu--added the above two lines + + in_cal *the_call = + new in_cal(type_s32, operand(), operand(new in_ldc(unkown_func->ptr_to(), operand(), immed(cudaBind))), 4); + in_ldc *ins = new in_ldc(type_s32, operand(), immed(0)); + the_call->set_argument(0, operand(ins)); + the_call->set_argument(1, operand(texRef));//protonu--change to add the new sym + the_call->set_argument(2, operand(dvs)); + the_call->set_argument(3, arrayVars[i].size_expr.clone()); + + tnl = new tree_node_list; + tnl->append(new tree_instr(the_call)); + setup_code = ocg->StmtListAppend(setup_code, + new CG_suifRepr(tnl)); + } + + //protonu--if arrayvar is mapped to constant memory + if(arrayVars[i].cons_mapped == true) + { + char buf[32]; + snprintf(buf, 32, "cs%dRef", i+1); + //arrayVars[i].secondName = buf; + array_bound low (0); + array_bound high (arrayVars[i].var_ref_size -1); + array_type *arr = new array_type(arrayVars[i].type,low, high); + type_node* cons_arr = ocg->ModifyType(arr, "__device__ __constant__"); + cons_arr = globals->install_type(cons_arr); + var_sym* consRef = new var_sym(cons_arr, buf); + cons_ref_map[arrayVars[i].original_name] = consRef; + globals->add_sym(consRef); + + + + in_cal *the_call = + new in_cal(type_s32, operand(), operand(new in_ldc(unkown_func->ptr_to(), operand(), immed(cudaMemcpySym))), 3); + the_call->set_argument(0, operand(new in_ldc(type_void->ptr_to(), operand(), immed(consRef)))); + the_call->set_argument(1, operand(arrayVars[i].in_data)); + the_call->set_argument(2, arrayVars[i].size_expr.clone()); + + tnl = new tree_node_list; + tnl->append(new tree_instr(the_call)); + setup_code = ocg->StmtListAppend(setup_code, + new CG_suifRepr(tnl)); + + } + } + } + + //Build dimGrid dim3 variables based on loop dimentions and ti/tj + char blockD1[120]; + char blockD2[120]; + if(dim1){ + snprintf(blockD1, 120, "%s/%d", dim1->name(), cu_tx); + snprintf(blockD2, 120, "%s/%d", dim2->name(), cu_ty); + }else{ + snprintf(blockD1, 120, "%d", cu_bx); + snprintf(blockD2, 120, "%d", cu_by); + //snprintf(blockD1, 120, "%d/%d", cu_nx, cu_tx); + //snprintf(blockD2, 120, "%d/%d", cu_ny, cu_ty); + } + repr = ocg->CreateDim3(immed((char*)gridName), + immed(blockD1), + immed(blockD2)); + setup_code = ocg->StmtListAppend(setup_code, repr); + + repr = ocg->CreateDim3(immed((char*)blockName), immed(cu_tx),immed(cu_ty)); + + if(cu_tz > 1) + repr = ocg->CreateDim3(immed((char*)blockName), immed(cu_tx), immed(cu_ty), immed(cu_tz)); + else + repr = ocg->CreateDim3(immed((char*)blockName), immed(cu_tx), immed(cu_ty)); + setup_code = ocg->StmtListAppend(setup_code, repr); + + //call kernel function with name loop_name + //like: transpose_k<<<dimGrid,dimBlock>>>(devOPtr, devIPtr , width, height); + char dims[120]; + snprintf(dims,120,"<<<%s,%s>>>",gridName, blockName); + immed_list *iml = new immed_list; + iml->append(immed((char*)cu_kernel_name.c_str())); + iml->append(immed(dims)); + //printf("%s %s\n", static_cast<const char*>(cu_kernel_name), dims); + for(int i=0; i<arrayVars.size(); i++) + //Throw in a type cast if our kernel takes 2D array notation + //like (float(*) [1024]) + { + //protonu--throwing in another hack to stop the caller from passing tex mapped + //vars to the kernel. + if(arrayVars[i].tex_mapped == true || arrayVars[i].cons_mapped == true ) + continue; + if(arrayVars[i].size_2d >= 0) + { + snprintf(dims,120,"(float(*) [%d])%s", arrayVars[i].size_2d, + const_cast<char*>(arrayVars[i].name.c_str())); + //printf("%d %s\n", i, dims); + iml->append(immed(dims)); + }else{ + //printf("%d %s\n", i, static_cast<const char*>(arrayVars[i].name)); + iml->append(immed(const_cast<char*>( + arrayVars[i].name.c_str()))); + } + } + if(dim1){ + iml->append(immed(dim1)); + iml->append(immed(dim2)); + } + repr = ocg->CreateKernel(iml);//kernel call + setup_code = ocg->StmtListAppend(setup_code, repr); + + //cuda free variables + for(int i=0; i<arrayVars.size(); i++) + { + if(arrayVars[i].out_data) + { + //cudaMemcpy args + in_cal *the_call = + new in_cal(type_s32, operand(), operand(new in_ldc(unkown_func->ptr_to(), operand(), immed(cudaMemcpy))), 4); + the_call->set_argument(0, operand(arrayVars[i].out_data)); + the_call->set_argument(1, operand(symtab->lookup_var(const_cast<char*>( + arrayVars[i].name.c_str())))); + the_call->set_argument(2, arrayVars[i].size_expr.clone()); + the_call->set_argument(3, operand(cudaMemcpyDeviceToHost)); + + tree_node_list* tnl = new tree_node_list; + tnl->append(new tree_instr(the_call)); + teardown_code = ocg->StmtListAppend(teardown_code, + new CG_suifRepr(tnl)); + } + + in_cal *the_call = + new in_cal(type_s32, operand(), operand(new in_ldc(unkown_func->ptr_to(), operand(), immed(cudaFree))), 1); + the_call->set_argument(0, operand(symtab->lookup_var(const_cast<char*>( + arrayVars[i].name.c_str())))); + + tree_node_list* tnl = new tree_node_list; + tnl->append(new tree_instr(the_call)); + teardown_code = ocg->StmtListAppend(teardown_code, + new CG_suifRepr(tnl)); + } + + // --------------- + // BUILD THE KERNEL + // --------------- + + //Extract out kernel body + tree_node_list* code = getCode(); + //Get rid of wrapper if that original() added + if(code->head()->contents->kind() == TREE_IF) + { + tree_if* ifn = (tree_if*)code->head()->contents; + code = ifn->then_part(); + } + + //Create kernel function body + proc_sym *new_psym = globals->new_proc(kernel_type, src_c, (char*)cu_kernel_name.c_str()); + proc_symtab *new_proc_syms = new proc_symtab(new_psym->name()); + globals->add_child(new_proc_syms); + + //Add Params + std::map<std::string, var_sym*> loop_vars; + //In-Out arrays + type_node* fptr; + for(int i=0; i<arrayVars.size(); i++) + { + if(arrayVars[i].in_data) + //fptr = arrayVars[i].in_data->type()->clone(); + fptr = arrayVars[i].in_data->type(); + else + //fptr = arrayVars[i].out_data->type()->clone(); + fptr = arrayVars[i].out_data->type(); + fptr = new_proc_syms->install_type(fptr); + std::string name = arrayVars[i].in_data ? arrayVars[i].in_data->name() : arrayVars[i].out_data->name(); + var_sym* sym = new var_sym(fptr, arrayVars[i].in_data ? arrayVars[i].in_data->name() : arrayVars[i].out_data->name()); + //protonu--adding a check to ensure that texture memories are not passed in as arguments + if(arrayVars[i].tex_mapped != true && arrayVars[i].cons_mapped !=true ) + { + sym->set_param(); + new_proc_syms->params()->append(sym); + new_proc_syms->add_sym(sym);//protonu--added to suppress the addition of the redundant var in the kernel + } + if (arrayVars[i].cons_mapped == true) + { + sym->set_param(); + new_proc_syms->add_sym(sym); + } + //printf("inserting name: %s\n", static_cast<const char*>(name)); + loop_vars.insert(std::pair<std::string, var_sym*>(std::string(name), sym)); + } + + if(dim1) + { + //Array dimentions + var_sym* kdim1 = new var_sym(dim1->type(), dim1->name()); + kdim1->set_param(); + new_proc_syms->add_sym(kdim1); + loop_vars.insert(std::pair<std::string, var_sym*>(std::string(dim1->name()), kdim1)); + var_sym* kdim2 = new var_sym(dim2->type(), dim2->name()); + kdim2->set_param(); + new_proc_syms->add_sym(kdim2); + loop_vars.insert(std::pair<std::string, var_sym*>(std::string(dim2->name()), kdim2)); + new_proc_syms->params()->append(kdim1); + new_proc_syms->params()->append(kdim2); + } + //Put block and thread implicit variables into scope + std::vector<var_sym *> index_syms; + /* Currently we don't use the block dimentions + var_sym* blockDim_x = new var_sym(type_s32, "blockDim.x"); + blockDim_x->set_param(); + new_proc_syms->add_sym(blockDim_x); + var_sym* blockDim_y = new var_sym(type_s32, "blockDim.y"); + blockDim_y->set_param(); + new_proc_syms->add_sym(blockDim_y); + */ + if(cu_bx > 1){ + var_sym* blockIdx_x = new var_sym(type_s32, "blockIdx.x"); + blockIdx_x->set_param(); + new_proc_syms->add_sym(blockIdx_x); + index_syms.push_back(blockIdx_x); + } + if(cu_by > 1){ + var_sym* blockIdx_y = new var_sym(type_s32, "blockIdx.y"); + blockIdx_y->set_param(); + new_proc_syms->add_sym(blockIdx_y); + index_syms.push_back(blockIdx_y); + } + if(cu_tx > 1){ + var_sym* threadIdx_x = new var_sym(type_s32, "threadIdx.x"); + threadIdx_x->set_param(); + new_proc_syms->add_sym(threadIdx_x); + index_syms.push_back(threadIdx_x); + } + if(cu_ty > 1){ + var_sym* threadIdx_y = new var_sym(type_s32, "threadIdx.y"); + threadIdx_y->set_param(); + new_proc_syms->add_sym(threadIdx_y); + index_syms.push_back(threadIdx_y); + } + + if(cu_tz > 1){ + var_sym* threadIdx_z = new var_sym(type_s32, "threadIdx.z"); + threadIdx_z->set_param(); + new_proc_syms->add_sym(threadIdx_z); + index_syms.push_back(threadIdx_z); + } + + //Figure out which loop variables will be our thread and block dimention variables + std::vector<var_sym *> loop_syms; + //Get our indexes + std::vector<const char*> indexes;// = get_loop_indexes(code,cu_num_reduce); + int threadsPos=0; + if(cu_bx > 1) + indexes.push_back("bx"); + if(cu_by > 1) + indexes.push_back("by"); + if(cu_tx > 1){ + threadsPos = indexes.size(); + indexes.push_back("tx"); + } + if(cu_ty > 1) + indexes.push_back("ty"); + if(cu_tz > 1) + indexes.push_back("tz"); + for(int i=0; i<indexes.size(); i++) + { + //printf("indexes[%d] = %s\n", i, (char*)indexes[i]); + loop_syms.push_back(new var_sym(type_s32, (char*)indexes[i])); + new_proc_syms->add_sym(loop_syms[i]); + //loop_vars.insert(std::pair<std::string, var_sym*>(std::string(indexes[i]), loop_syms[i])); + } + + //Generate this code + //int bx = blockIdx.x + //int by = blockIdx.y + //int tx = threadIdx.x + //int ty = threadIdx.y + CG_outputRepr *body=NULL; + for(int i=0; i<indexes.size(); i++){ + CG_outputRepr *lhs = new CG_suifRepr(operand(loop_syms[i])); + //body = ocg->StmtListAppend(body, ocg->CreateStmtList( + // ocg->CreateAssignment(0, lhs, new CG_suifRepr(operand(index_syms[i]))))); + body = ocg->StmtListAppend(body, ocg->StmtListAppend( + ocg->CreateAssignment(0, lhs, new CG_suifRepr(operand(index_syms[i]))), NULL)); + } + + //Get our inital code prepped for loop reduction. First we need to swap + //out internal SUIF variable references to point to the new local + //function symbol table. + std::map<std::string, var_sym*> loop_idxs; //map from idx names to their new syms + std::vector< std::pair<var_sym*, var_sym*> > dim_vars; //pair is of <old,new> var_sym (for 2D array size initializations) + replacements r; + tree_node_list* swapped = swapVarReferences(code, &r, ocg, loop_vars, new_proc_syms, dim_vars); + //printf("\n code before recursiveFindReplacePreferedIdxs :\n"); + //swapped->print(); + swapped = recursiveFindReplacePreferedIdxs(swapped, new_proc_syms, cudaSync, void_func, loop_idxs);//in-place swapping + //printf("\n code after recursiveFindReplacePreferedIdxs :\n"); + //swapped->print(); + + for(int i=0; i<indexes.size(); i++){ + std::vector<tree_for*> tfs = findCommentedFors(indexes[i], swapped); + for(int k=0; k<tfs.size(); k++){ + //printf("replacing %p tfs for index %s\n", tfs[k], indexes[i]); + tree_node_list* newBlock = forReduce(tfs[k], loop_idxs[indexes[i]], new_proc_syms); + //newBlock->print(); + swap_node_for_node_list(tfs[k], newBlock); + //printf("AFTER SWAP\n"); newBlock->print(); + } + } + //printf("AFTER REDUCE\n"); swapped->print(); + + if(static_cast<const IR_cudasuifCode *>(ir)->init_code()){ + tree_node_list* orig_init_code = static_cast<CG_suifRepr *>(static_cast<const IR_cudasuifCode *>(ir)->init_code())->GetCode(); + for(int i=0; i<dim_vars.size(); i++){ + //We have a map of var_sym from the original function body and we know + //that these var_syms have initialization statements which define the + //array size. We need to mimic these initialization statements. + + //First find the assignment and pull out the constant initialization + //value + int value = -1; + tree_node_list_iter tnli(orig_init_code); + while (!tnli.is_empty()) { + tree_node *node = tnli.step(); + if(node->kind() == TREE_INSTR && ((tree_instr*)node)->instr()->format() == inf_rrr) + { + in_rrr* inst = (in_rrr*)((tree_instr*)node)->instr(); + //expect the structure: cpy( _ = min(grab_me, _)) + if(inst->opcode() == io_cpy && inst->dst_op().is_symbol()){ + //printf("looking at instruction: "); + //inst->print(); + var_sym* dest = inst->dst_op().symbol(); + if(dest == dim_vars[i].first) + { + if(inst->src1_op().is_instr() && inst->src1_op().instr()->format() == inf_ldc){ + value = ((in_ldc*)inst->src1_op().instr())->value().integer(); + } + } + } + } + } + if(value < 0){ + fprintf(stderr, "ERROR: Could not find initializing statement for variable used in upper_bound of array type"); + } + CG_outputRepr *lhs = new CG_suifRepr(operand(dim_vars[i].second)); + //body = ocg->StmtListAppend(body, ocg->CreateStmtList(ocg->CreateAssignment(0, lhs, ocg->CreateInt(value)))); + body = ocg->StmtListAppend(body, ocg->StmtListAppend(ocg->CreateAssignment(0, lhs, ocg->CreateInt(value)), NULL)); + } + } + + + body = ocg->StmtListAppend(body, new CG_suifRepr(swapped)); + + //protonu--lets try creating our function definiton here + var_sym *tsym = NULL; + + + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(body); + for(int i=0; i<refs.size(); i++) + { + //check if the array is tex mapped + if(texture != NULL && texture->is_array_tex_mapped(refs[i]->name().c_str())) + { + //protonu--our new tex lookup function + in_cal *tex_lookup = + new in_cal(type_f32, operand(), operand(new in_ldc(float_func->ptr_to(), operand(), immed(tex1D))), 2); + + //printf("name of the array to be mapped is %s\n", refs[i]->name().c_str()); + tsym = tex_ref_map[refs[i]->name()]; + tex_lookup->set_argument(0, operand(tsym)); + + + int array_dims = ((IR_suifArrayRef *)refs[i])->ia_->dims(); + + if (array_dims == 1){ + tex_lookup->set_argument(1, ((IR_suifArrayRef *)refs[i])->ia_->index(0).clone()); + }else if (array_dims > 2) { + printf(" \n we don't handle more than 2D arrays mapped to textures yet\n"); + }else if (array_dims == 2) { + + IR_ArraySymbol *sym = refs[i]->symbol(); + CG_outputRepr *sz = sym->size(1); + delete sym; // free the wrapper object only + // find the builder ocg + CG_outputRepr *expr = ocg->CreateTimes(sz->clone(),refs[i]->index(0)); + delete sz; // free the wrapper object only + expr = ocg->CreatePlus(expr, refs[i]->index(1)); + // expr holds the 1D access expression and take it out + tex_lookup->set_argument(1, ((CG_suifRepr *)expr)->GetExpression()); + } + + //using chun's function to replace the array look up with the function call + ((IR_suifCode *)ir)->ReplaceExpression(refs[i] , new CG_suifRepr(operand(tex_lookup))); + } + + } + + + tsym = NULL; + //protonu--now let's try what we did above for constant memory + for(int i=0; i<refs.size(); i++) + { + //check if the array is tex mapped + if(constant_mem != NULL && constant_mem->is_array_cons_mapped(refs[i]->name().c_str())) + { + + //printf("name of the array to be cons mapped is %s\n", refs[i]->name().c_str()); + tsym = cons_ref_map[refs[i]->name()]; + //we should create a IR_SuifArray here + IR_ArraySymbol *ar_sym = new IR_suifArraySymbol(ir,tsym); + std::vector<CG_outputRepr *> ar_index; + ar_index.push_back(((IR_suifArrayRef *)refs[i])->index(0)); + IR_ArrayRef *ar_ref = ((IR_suifCode *)ir)->CreateArrayRef(ar_sym, ar_index); + //using chun's function to replace the array look up with the function call + ((IR_suifCode *)ir)->ReplaceExpression(refs[i] , new CG_suifRepr(operand(((IR_suifArrayRef *)ar_ref)->ia_))); + + } + } + + + tree_proc *new_body = new tree_proc(static_cast<CG_suifRepr*>(body)->GetCode(), new_proc_syms); + //globals->add_child(new_proc_syms); + new_psym->set_block(new_body); + new_procs.push_back(new_psym); + + return swapped; +} + +//Order taking out dummy variables +std::vector<std::string> cleanOrder(std::vector<std::string> idxNames){ + std::vector<std::string> results; + for(int j=0; j<idxNames.size(); j++){ + if(idxNames[j].length() != 0) + results.push_back(idxNames[j]); + } + return results; +} + +//First non-dummy level in ascending order +int LoopCuda::nonDummyLevel(int stmt, int level){ + //level comes in 1-basd and should leave 1-based + for(int j=level-1; j<idxNames[stmt].size(); j++){ + if(idxNames[stmt][j].length() != 0){ + //printf("found non dummy level of %d with idx: %s when searching for %d\n", j+1, (const char*) idxNames[stmt][j], level); + return j+1; + } + } + char buf[128]; sprintf(buf, "%d", level); + throw std::runtime_error(std::string("Unable to find a non-dummy level starting from ") + std::string(buf)); +} + +int LoopCuda::findCurLevel(int stmt, std::string idx){ + for(int j=0; j<idxNames[stmt].size(); j++){ + if(strcmp(idxNames[stmt][j].c_str(),idx.c_str()) == 0) + return j+1; + } + throw std::runtime_error(std::string("Unable to find index ") + idx + std::string(" in current list of indexes")); +} + +void LoopCuda::permute_cuda(int stmt, const std::vector<std::string>& curOrder) +{ + //printf("curOrder: "); + //printVs(curOrder); + //printf("idxNames: "); + //printVS(idxNames[stmt]); + std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt]); + bool same=true; + std::vector<int> pi; + for(int i=0; i<curOrder.size(); i++){ + bool found = false; + for(int j=0; j<cIdxNames.size(); j++){ + if(strcmp(cIdxNames[j].c_str(), curOrder[i].c_str()) == 0){ + pi.push_back(j+1); + found=true; + if(j!=i) + same=false; + } + } + if(!found){ + throw std::runtime_error("One of the indexes in the permute order where not " + "found in the current set of indexes."); + } + } + for(int i=curOrder.size(); i<cIdxNames.size(); i++){ + pi.push_back(i); + } + if(same) + return; + permute(stmt, pi); + //Set old indexe names as new + for(int i=0; i<curOrder.size(); i++){ + idxNames[stmt][i] = curOrder[i].c_str(); //what about sibling stmts? + } +} + + +bool LoopCuda::permute(int stmt_num, const std::vector<int> &pi) +{ +// check for sanity of parameters + if (stmt_num >= stmt.size() || stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + const int n = stmt[stmt_num].xform.n_out(); + if (pi.size() > (n-1)/2) + throw std::invalid_argument("iteration space dimensionality does not match permute dimensionality"); + int first_level = 0; + int last_level = 0; + for (int i = 0; i < pi.size(); i++) { + if (pi[i] > (n-1)/2 || pi[i] <= 0) + throw std::invalid_argument("invalid loop level " + to_string(pi[i]) + " in permuation"); + + if (pi[i] != i+1) { + if (first_level == 0) + first_level = i+1; + last_level = i+1; + } + } + if (first_level == 0) + return true; + + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> active = getStatements(lex, 2*first_level-2); + Loop::permute(active, pi); +} + + +void LoopCuda::tile_cuda(int stmt, int level, int outer_level) +{ + tile_cuda(stmt,level,1,outer_level,"","",CountedTile); +} +void LoopCuda::tile_cuda(int level, int tile_size, int outer_level, std::string idxName, + std::string ctrlName, TilingMethodType method){ + tile_cuda(0, level, tile_size, outer_level, idxName, ctrlName, method); +} + +void LoopCuda::tile_cuda(int stmt, int level, int tile_size, int outer_level, std::string idxName, + std::string ctrlName, TilingMethodType method){ + //Do regular tile but then update the index and control loop variable + //names as well as the idxName to reflect the current state of things. + //printf("tile(%d,%d,%d,%d)\n", stmt, level, tile_size, outer_level); + //printf("idxNames before: "); + //printVS(idxNames[stmt]); + + tile(stmt, level, tile_size, outer_level, method); + + if(idxName.size()) + idxNames[stmt][level-1] = idxName.c_str(); + if(tile_size == 1){ + //potentially rearrange loops + if(outer_level < level){ + std::string tmp = idxNames[stmt][level-1]; + for(int i=level-1; i>outer_level-1; i--){ + if(i-1 >= 0) + idxNames[stmt][i] = idxNames[stmt][i-1]; + } + idxNames[stmt][outer_level-1] = tmp; + } + //TODO: even with a tile size of one, you need a insert (of a dummy loop) + idxNames[stmt].insert(idxNames[stmt].begin()+(level),""); + }else{ + if(!ctrlName.size()) + throw std::runtime_error("No ctrl loop name for tile"); + //insert + idxNames[stmt].insert(idxNames[stmt].begin()+(outer_level-1),ctrlName.c_str()); + } + + //printf("idxNames after: "); + //printVS(idxNames[stmt]); +} + + +bool LoopCuda::datacopy_privatized_cuda(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, bool allow_extra_read , int fastest_changing_dimension , int padding_stride , int padding_alignment , bool cuda_shared) +{ + int old_stmts =stmt.size(); + //datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, cuda_shared); + if(cuda_shared) + datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, 1); + else + datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, 0); + + + //Adjust idxNames to reflect updated state + std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]); + int new_stmts = stmt.size(); + for(int i=old_stmts; i<new_stmts; i++){ + //printf("fixing up statement %d\n", i); + std::vector<std::string> idxs; + + + //protonu-making sure the vector of nonSplitLevels grows along with + //the statement structure + stmt_nonSplitLevels.push_back(omega::Tuple<int>()); + + //Indexes up to level will be the same + for(int j=0; j<level-1; j++) + idxs.push_back(cIdxNames[j]); + + //Expect privatized_levels to match + for(int j=0; j<privatized_levels.size(); j++) + idxs.push_back(cIdxNames[privatized_levels[j]-1]);//level is one-based + + //all further levels should match order they are in originally + if(privatized_levels.size()){ + int last_privatized = privatized_levels.back(); + int top_level = last_privatized + (stmt[i].IS.n_set()-idxs.size()); + //printf("last privatized_levels: %d top_level: %d\n", last_privatized, top_level); + for(int j=last_privatized; j<top_level; j++){ + idxs.push_back(cIdxNames[j]); + //printf("pushing back: %s\n", (const char*)cIdxNames[j]); + } + } + idxNames.push_back(idxs); + } +} + +bool LoopCuda::datacopy_cuda(int stmt_num, int level, const std::string &array_name, std::vector<std::string> new_idxs, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, bool cuda_shared) +{ + + int old_stmts =stmt.size(); + //datacopy(stmt_num,level,array_name,allow_extra_read,fastest_changing_dimension,padding_stride,padding_alignment,cuda_shared); + if(cuda_shared) + datacopy(stmt_num,level,array_name,allow_extra_read,fastest_changing_dimension,padding_stride,padding_alignment, 1); + else + datacopy(stmt_num,level,array_name,allow_extra_read,fastest_changing_dimension,padding_stride,padding_alignment, 0); + //Adjust idxNames to reflect updated state + std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]); + int new_stmts = stmt.size(); + for(int i=old_stmts; i<new_stmts; i++){ + //printf("fixing up statement %d\n", i); + std::vector<std::string> idxs; + + //protonu-making sure the vector of nonSplitLevels grows along with + //the statement structure + stmt_nonSplitLevels.push_back(omega::Tuple<int>()); + + //protonu--lets dump out the code from each statement here + //printf("\n dumping statement :%d", i); + //stmt[i].code->Dump(); + + //Indexes up to level will be the same + for(int j=0; j<level-1; j++) + idxs.push_back(cIdxNames[j]); + + //all further levels should get names from new_idxs + int top_level = stmt[i].IS.n_set(); + //printf("top_level: %d level: %d\n", top_level, level); + if(new_idxs.size() < top_level-level+1) + throw std::runtime_error("Need more new index names for new datacopy loop levels"); + + for(int j=level-1; j<top_level; j++){ + idxs.push_back(new_idxs[j-level+1].c_str()); + //printf("pushing back: %s\n", new_idxs[j-level+1].c_str()); + } + idxNames.push_back(idxs); + } +} + +bool LoopCuda::unroll_cuda(int stmt_num, int level, int unroll_amount) +{ + int old_stmts =stmt.size(); + //bool b= unroll(stmt_num, , unroll_amount); + + + int dim = 2*level-1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim-1); + + level = nonDummyLevel(stmt_num,level); + //printf("unrolling %d at level %d\n", stmt_num,level); + + //protonu--using the new version of unroll, which returns + //a set of ints instead of a bool. To keep Gabe's logic + //I'll check the size of the set, if it's 0 return true + //bool b= unroll(stmt_num, level, unroll_amount); + std::set<int> b_set= unroll(stmt_num, level, unroll_amount); + bool b = false; + if (b_set.size() == 0) b = true; + //end--protonu + + //Adjust idxNames to reflect updated state + std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]); + std::vector<std::string> origSource = idxNames[stmt_num];; + //Drop index names at level + if(unroll_amount == 0){ + //For all statements that were in this unroll together, drop index name for unrolled level + idxNames[stmt_num][level-1] = ""; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + //printf("in same loop as %d is %d\n", stmt_num, (*i)); + //idxNames[(*i)][level-1] = ""; + idxNames[(*i)] = idxNames[stmt_num]; + } + } + + lex = getLexicalOrder(stmt_num); + same_loop = getStatements(lex, dim-1); + + bool same_as_source = false; + int new_stmts = stmt.size(); + for(int i=old_stmts; i<new_stmts; i++){ + //Check whether we had a sync for the statement we are unrolling, if + //so, propogate that to newly created statements so that if they are + //in a different loop structure, they will also get a syncthreads + int size = syncs.size(); + for(int j=0; j<size; j++){ + if(syncs[j].first == stmt_num) + syncs.push_back(make_pair(i,syncs[j].second)); + } + + //protonu-making sure the vector of nonSplitLevels grows along with + //the statement structure + stmt_nonSplitLevels.push_back(omega::Tuple<int>()); + + + //We expect that new statements have a constant for the variable in + //stmt[i].IS at level (as seen with print_with_subs), otherwise there + //will be a for loop at level and idxNames should match stmt's + //idxNames pre-unrolled + Relation IS = stmt[i].IS; + //Ok, if you know how the hell to get anything out of a Relation, you + //should probably be able to do this more elegantly. But for now, I'm + //hacking it. + std::string s = IS.print_with_subs_to_string(); + //s looks looks like + //{[_t49,8,_t51,_t52,128]: 0 <= _t52 <= 3 && 0 <= _t51 <= 15 && 0 <= _t49 && 64_t49+16_t52+_t51 <= 128} + //where level == 5, you see a integer in the input set + + //If that's not an integer and this is the first new statement, then + //we think codegen will have a loop at that level. It's not perfect, + //not sure if it can be determined without round-tripping to codegen. + int sIdx = 0; + int eIdx = 0; + for(int j=0; j<level-1; j++){ + sIdx = s.find(",",sIdx+1); + if(sIdx < 0) break; + } + if(sIdx > 0){ + eIdx = s.find("]"); + int tmp = s.find(",",sIdx+1); + if(tmp > 0 && tmp < eIdx) + eIdx = tmp; //", before ]" + if(eIdx > 0){ + sIdx++; + std::string var = s.substr(sIdx,eIdx-sIdx); + //printf("%s\n", s.c_str()); + //printf("set var for stmt %d at level %d is %s\n", i, level, var.c_str()); + if(atoi(var.c_str()) == 0 && i ==old_stmts){ + //TODO:Maybe do see if this new statement would be in the same + //group as the original and if it would, don't say + //same_as_source + if(same_loop.find(i) == same_loop.end()){ + printf("stmt %d level %d, newly created unroll statement should have same level indexes as source\n", i, level); + same_as_source = true; + } + } + } + } + + + //printf("fixing up statement %d n_set %d with %d levels\n", i, stmt[i].IS.n_set(), level-1); + if(same_as_source) + idxNames.push_back(origSource); + else + idxNames.push_back(idxNames[stmt_num]); + } + + return b; +} + +void LoopCuda::copy_to_texture(const char *array_name) +{ + //protonu--placeholder for now + //set the bool for using cuda memory as true + //in a vector of strings, put the names of arrays to tex mapped + if ( !texture ) + texture = new texture_memory_mapping(true, array_name); + else + texture->add(array_name); + + +} + + +void LoopCuda::copy_to_constant(const char *array_name) +{ + //protonu--placeholder for now + //set the bool for using cuda memory as true + //in a vector of strings, put the names of arrays to tex mapped + if ( !constant_mem ) + constant_mem = new constant_memory_mapping(true, array_name); + else + constant_mem->add(array_name); +} + +//protonu--moving this from Loop +tree_node_list* LoopCuda::codegen() +{ + if(code_gen_flags & GenCudaizeV2) + return cudaize_codegen_v2(); + //Do other flagged codegen methods, return plain vanilla generated code + return getCode(); +} + +//These three are in Omega code_gen.cc and are used as a massive hack to +//get out some info from MMGenerateCode. Yea for nasty side-effects. +namespace omega{ + extern int checkLoopLevel; + extern int stmtForLoopCheck; + extern int upperBoundForLevel; + extern int lowerBoundForLevel; +} + + +void LoopCuda::extractCudaUB(int stmt_num, int level, int &outUpperBound, int &outLowerBound){ + // check for sanity of parameters + const int m = stmt.size(); + if (stmt_num >= m || stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + const int n = stmt[stmt_num].xform.n_out(); + if (level > (n-1)/2 || level <= 0) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + int dim = 2*level-1; + + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim-1); + + // extract the intersection of the iteration space to be considered + Relation hull; + { + hull = Relation::True(n); + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + hull = Intersection(hull, project_onto_levels(getNewIS(*i), dim+1, true)); + hull.simplify(2, 4); + } + + for (int i = 2; i <= dim+1; i+=2) { + //std::string name = std::string("_t") + to_string(t_counter++); + std::string name = std::string("_t") + to_string(tmp_loop_var_name_counter++); + hull.name_set_var(i, name); + } + hull.setup_names(); + } + + // extract the exact loop bound of the dimension to be unrolled + if (is_single_iteration(hull, dim)){ + throw std::runtime_error("No loop availabe at level to extract upper bound."); + } + Relation bound = get_loop_bound(hull, dim); + if (!bound.has_single_conjunct() || !bound.is_satisfiable() || bound.is_tautology()) + throw loop_error("loop error: unable to extract loop bound for cudaize"); + + // extract the loop stride + EQ_Handle stride_eq; + int stride = 1; + { + bool simple_stride = true; + int strides = countStrides(bound.query_DNF()->single_conjunct(), bound.set_var(dim+1), stride_eq, simple_stride); + if (strides > 1) + throw loop_error("loop error: too many strides"); + else if (strides == 1) { + int sign = stride_eq.get_coef(bound.set_var(dim+1)); +// assert(sign == 1 || sign == -1); + Constr_Vars_Iter it(stride_eq, true); + stride = abs((*it).coef/sign); + } + } + if(stride != 1){ + char buf[1024]; + sprintf(buf, "Cudaize: Loop at level %d has non-one stride of %d", level, stride); + throw std::runtime_error(buf); + } + + //Use code generation system to build tell us our bound information. We + //need a hard upper bound a 0 lower bound. + + checkLoopLevel = level*2; + stmtForLoopCheck = stmt_num; + upperBoundForLevel = -1; + lowerBoundForLevel = -1; + printCode(1,false); + checkLoopLevel = 0; + + outUpperBound = upperBoundForLevel; + outLowerBound = lowerBoundForLevel; + return; +} + + +void LoopCuda::printCode(int effort, bool actuallyPrint) const { + const int m = stmt.size(); + if (m == 0) + return; + const int n = stmt[0].xform.n_out(); + + + + Tuple<Relation> IS(m); + Tuple<Relation> xform(m); + Tuple<IntTuple > nonSplitLevels(m); + for (int i = 0; i < m; i++) { + IS[i+1] = stmt[i].IS; + xform[i+1] = stmt[i].xform; + nonSplitLevels[i+1] = stmt_nonSplitLevels[i]; + //nonSplitLevels[i+1] = stmt[i].nonSplitLevels; + } + + Tuple< Tuple<std::string> > idxTupleNames; + if(useIdxNames){ + for(int i=0; i<idxNames.size(); i++){ + Tuple<std::string> idxs; + for(int j=0; j<idxNames[i].size(); j++) + idxs.append(idxNames[i][j]); + idxTupleNames.append( idxs ); + } + } + + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + CG_stringBuilder *ocg = new CG_stringBuilder(); + Tuple<CG_outputRepr *> nameInfo; + for (int i = 1; i <= m; i++) + nameInfo.append(new CG_stringRepr("s" + to_string(i))); + CG_outputRepr* repr = MMGenerateCode(ocg, xform, IS, nameInfo, known, nonSplitLevels, syncs, idxTupleNames, effort); + if(actuallyPrint) + std::cout << GetString(repr); +/* + for (int i = 1; i <= m; i++) + delete nameInfo[i]; +*/ + + delete ocg; +} + + + +void LoopCuda::printRuntimeInfo() const { + for(int i=0; i<stmt.size(); i++){ + Relation IS = stmt[i].IS; + Relation xform = stmt[i].xform; + printf("stmt[%d]\n", i); + printf("IS\n"); + IS.print_with_subs(); + + printf("xform[%d]\n", i); + xform.print_with_subs(); + + //printf("code\n"); + //static_cast<CG_suifRepr *>(stmt[i].code)->GetCode()->print_expr(); + } +} + +void LoopCuda::printIndexes() const { + for(int i=0; i<stmt.size(); i++){ + printf("stmt %d nset %d ", i, stmt[i].IS.n_set()); + + for(int j=0; j<idxNames[i].size(); j++){ + if(j>0) + printf(","); + printf("%s", idxNames[i][j].c_str()); + } + printf("\n"); + } +} + +tree_node_list* LoopCuda::getCode(int effort) const { + const int m = stmt.size(); + if (m == 0) + return new tree_node_list; + const int n = stmt[0].xform.n_out(); + + + + Tuple<CG_outputRepr *> ni(m); + Tuple<Relation> IS(m); + Tuple<Relation> xform(m); + Tuple< IntTuple > nonSplitLevels(m); + for (int i = 0; i < m; i++) { + ni[i+1] = stmt[i].code; + IS[i+1] = stmt[i].IS; + xform[i+1] = stmt[i].xform; + nonSplitLevels[i+1] = stmt_nonSplitLevels[i]; + //nonSplitLevels[i+1] = stmt[i].nonSplitLevels; + } + + + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); +#ifdef DEBUG +// std::cout << GetString(MMGenerateCode(new CG_stringBuilder(), xform, IS, known, effort)); +#endif + Tuple< Tuple<std::string> > idxTupleNames; + if(useIdxNames){ + for(int i=0; i<idxNames.size(); i++){ + Tuple<std::string> idxs; + for(int j=0; j<idxNames[i].size(); j++) + idxs.append(idxNames[i][j]); + idxTupleNames.append( idxs ); + } + } + + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *repr = MMGenerateCode(ocg, xform, IS, ni, known, nonSplitLevels, syncs, idxTupleNames, effort); + + //CG_outputRepr *overflow_initialization = ocg->CreateStmtList(); + //protonu--using the new function CG_suifBuilder::StmtListAppend + CG_outputRepr *overflow_initialization = ocg->StmtListAppend(NULL, NULL); + for (std::map<int, std::vector<Free_Var_Decl *> >::const_iterator i = overflow.begin(); i != overflow.end(); i++) + for (std::vector<Free_Var_Decl *>::const_iterator j = i->second.begin(); j != i->second.end(); j++) + //overflow_initialization = ocg->StmtListAppend(overflow_initialization, ocg->CreateStmtList(ocg->CreateAssignment(0, ocg->CreateIdent((*j)->base_name()), ocg->CreateInt(0)))); + overflow_initialization = ocg->StmtListAppend(overflow_initialization, ocg->StmtListAppend(ocg->CreateAssignment(0, ocg->CreateIdent((*j)->base_name()), ocg->CreateInt(0)), NULL)); + + repr = ocg->StmtListAppend(overflow_initialization, repr); + tree_node_list *tnl = static_cast<CG_suifRepr *>(repr)->GetCode(); + + delete repr; + /* + for (int i = 1; i <= m; i++) + delete ni[i]; + */ + + return tnl; +} + + +//protonu--adding constructors for the new derived class +LoopCuda::LoopCuda():Loop(), code_gen_flags(GenInit){} + +LoopCuda::LoopCuda(IR_Control *irc, int loop_num) + :Loop(irc) +{ + setup_code = NULL; + teardown_code = NULL; + code_gen_flags = 0; + cu_bx = cu_by = cu_tx = cu_ty = cu_tz = 1; + cu_num_reduce = 0; + cu_mode = GlobalMem; + texture = NULL; + constant_mem = NULL; + + int m=stmt.size(); + //printf("\n the size of stmt(initially) is: %d\n", stmt.size()); + for(int i=0; i<m; i++) + stmt_nonSplitLevels.push_back(omega::Tuple<int>()); + + + //protonu--setting up + //proc_symtab *symtab + //global_symtab *globals + + globals = ((IR_cudasuifCode *)ir)->gsym_ ; + std::vector<tree_for *> tf = ((IR_cudasuifCode *)ir)->get_loops(); + + symtab = tf[loop_num]->proc()->block()->proc_syms(); + + std::vector<tree_for *> deepest = find_deepest_loops(tf[loop_num]); + + for (int i = 0; i < deepest.size(); i++){ + index.push_back(deepest[i]->index()->name()); //reflects original code index names + } + + for(int i=0; i< stmt.size(); i++) + idxNames.push_back(index); //refects prefered index names (used as handles in cudaize v2) + useIdxNames=false; + +} + diff --git a/loop_cuda.hh b/loop_cuda.hh new file mode 100644 index 0000000..15726c0 --- /dev/null +++ b/loop_cuda.hh @@ -0,0 +1,163 @@ +#ifndef LOOP_CUDA_HH +#define LOOP_CUDA_HH + +#include "loop.hh" +#include <string.h> +#include <suif1.h> + + +enum MemoryMode { GlobalMem, SharedMem, TexMem }; + +//protonu --class introduced to hold texture memory information in one single place +//this might help me get over the weird memory issues I am having with the Loop class +//where someone/something corrupts my memory +class texture_memory_mapping{ +private: + bool tex_mem_used; + std::vector< std::string > tex_mapped_array_name; +public: + texture_memory_mapping ( bool used, const char * array_name){ + tex_mem_used = used; + tex_mapped_array_name.push_back(std::string(array_name)); + } + + void add(const char * array_name) { + tex_mapped_array_name.push_back(std::string(array_name)); + } + + bool is_tex_mem_used() {return tex_mem_used;} + bool is_array_tex_mapped(const char * array_name){ + + for( int i=0; i<tex_mapped_array_name.size(); i++){ + if(!(strcmp(array_name, tex_mapped_array_name[i].c_str()))) + return true; + } + return false; + } + texture_memory_mapping() {tex_mem_used = false;} +}; + +//protonu --class introduced to hold constant memory information in one single place +//this might help me get over the weird memory issues I am having with the Loop class +//where someone/something corrupts my memory +class constant_memory_mapping{ +private: + bool cons_mem_used; + std::vector< std::string > cons_mapped_array_name; +public: + constant_memory_mapping ( bool used, const char * array_name){ + cons_mem_used = used; + cons_mapped_array_name.push_back(std::string(array_name)); + } + + void add(const char * array_name) { + cons_mapped_array_name.push_back(std::string(array_name)); + } + + bool is_cons_mem_used() {return cons_mem_used;} + bool is_array_cons_mapped(const char * array_name){ + + for( int i=0; i<cons_mapped_array_name.size(); i++){ + if(!(strcmp(array_name, cons_mapped_array_name[i].c_str()))) + return true; + } + return false; + } + constant_memory_mapping() {cons_mem_used = false;} +}; + + +class LoopCuda: public Loop{ + +public: + std::vector<proc_sym*> new_procs; //Need adding to a fse + std::vector< std::vector<std::string> > idxNames; + std::vector< std::pair<int, std::string> > syncs; + bool useIdxNames; + std::vector<std::string> index; + proc_symtab *symtab; + global_symtab *globals; + + //protonu--inserting this here, Gabe's implementation had it + //the struct statment as nonSplitLevels + std::vector<omega::Tuple<int> > stmt_nonSplitLevels; + + texture_memory_mapping *texture; //protonu + constant_memory_mapping *constant_mem; //protonu + std::map<std::string, int> array_dims; + omega::CG_outputRepr *setup_code; + omega::CG_outputRepr *teardown_code; + + unsigned int code_gen_flags; + enum CodeGenFlags { + GenInit = 0x00, + GenCudaizeV2 = 0x02, + }; + + + //varibles used by cudaize_codegen + //block x, y sizes, N and num_red + int cu_bx, cu_by, cu_n, cu_num_reduce; + //block statement and level + int cu_block_stmt, cu_block_level; + //thread x, y, z + int cu_tx, cu_ty, cu_tz; + //tile statements, and loop-levels (cudaize v1) + std::vector< std::vector<int> > cu_thread_loop; + std::vector<int> cu_thread_sync; + MemoryMode cu_mode; + + std::string cu_nx_name, cu_ny_name, cu_kernel_name; + int nonDummyLevel(int stmt, int level); + bool symbolExists(std::string s); + void addSync(int stmt, std::string idx); + void renameIndex(int stmt, std::string idx, std::string newName); + bool validIndexes(int stmt, const std::vector<std::string>& idxs); + void extractCudaUB(int stmt_num, int level, int &outUpperBound, int &outLowerBound); + + void printCode(int effort=1, bool actuallyPrint=true) const; + void printRuntimeInfo() const; + void printIndexes() const; + tree_node_list* getCode(int effort = 1) const; + + + void permute_cuda(int stmt, const std::vector<std::string>& curOrder); + //protonu-writing a wrapper for the Chun's new permute function + bool permute(int stmt_num, const std::vector<int> &pi); + //end--protonu. + void tile_cuda(int stmt, int level, int outer_level); + void tile_cuda(int level, int tile_size, int outer_level, std::string idxName, std::string ctrlName, TilingMethodType method=StridedTile); + void tile_cuda(int stmt, int level, int tile_size, int outer_level, std::string idxName, std::string ctrlName, TilingMethodType method=StridedTile); + bool datacopy_privatized_cuda(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, bool cuda_shared=false); + bool datacopy_cuda(int stmt_num, int level, const std::string &array_name, std::vector<std::string> new_idxs, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, bool cuda_shared=false); + bool unroll_cuda(int stmt_num, int level, int unroll_amount); + //protonu--using texture memory + void copy_to_texture(const char *array_name); + //protonu--using constant memory + void copy_to_constant(const char *array_name); + int findCurLevel(int stmt, std::string idx); + /** + * + * @param kernel_name Name of the GPU generated kernel + * @param nx Iteration space over the x dimention + * @param ny Iteration space over the y dimention + * @param tx Tile dimention over x dimention + * @param ty Tile dimention over the y dimention + * @param num_reduce The number of dimentions to reduce by mapping to the GPU implicit blocks/threads + */ + //stmnt_num is referenced from the perspective of being inside the cudize block loops + bool cudaize_v2(std::string kernel_name, std::map<std::string, int> array_dims, + std::vector<std::string> blockIdxs, std::vector<std::string> threadIdxs); + tree_node_list* cudaize_codegen_v2(); + tree_node_list* codegen(); + + //protonu--have to add the constructors for the new class + //and maybe destructors (?) + LoopCuda(); + //LoopCuda(IR_Code *ir, tree_for *tf, global_symtab* gsym); + LoopCuda(IR_Control *ir_c, int loop_num);//protonu-added so as to not change ir_suif + ~LoopCuda(); + +}; + +#endif diff --git a/loop_cuda_rose.cc b/loop_cuda_rose.cc new file mode 100644 index 0000000..c5633ee --- /dev/null +++ b/loop_cuda_rose.cc @@ -0,0 +1,3734 @@ +/***************************************************************************** + Copyright (C) 2009 University of Utah + All Rights Reserved. + + Purpose: + Cudaize methods + + Notes: + + History: + 1/7/10 Created by Gabe Rudy by migrating code from loop.cc + 31/1/11 Modified by Protonu Basu +*****************************************************************************/ +#define TRANSFORMATION_FILE_INFO Sg_File_Info::generateDefaultFileInfoForTransformationNode() +#include <code_gen/CG_stringBuilder.h> +#include <codegen.h> +#include <code_gen/CG_utils.h> +#include <code_gen/CG_outputRepr.h> +#include "loop_cuda_rose.hh" +#include "loop.hh" +#include <math.h> +//#include <useful.h> +#include "omegatools.hh" +#include "ir_cudarose.hh" +#include "ir_rose.hh" +#include "ir_rose_utils.hh" +#include "chill_error.hh" +#include <vector> +#include "Outliner.hh" +//#define DEBUG +using namespace omega; +using namespace SageBuilder; +using namespace SageInterface; +//using namespace Outliner; +//using namespace ASTtools; +char *k_cuda_texture_memory; //protonu--added to track texture memory type +//extern char *omega::k_cuda_texture_memory; //protonu--added to track texture memory type +extern char *omega::k_ocg_comment; + +static int cudaDebug; +class CudaStaticInit { +public: + CudaStaticInit() { + cudaDebug = 0; //Change this to 1 for debug + } +}; +static CudaStaticInit junkInitInstance__; + +std::string& upcase(std::string& s) { + for (int i = 0; i < s.size(); i++) + s[i] = toupper(s[i]); + return s; +} + +void printVs(const std::vector<std::string>& curOrder) { + if (!cudaDebug) return; + for (int i = 0; i < curOrder.size(); i++) { + if (i > 0) + printf(","); + printf("%s", curOrder[i].c_str()); + } + printf("\n"); +} + +void printVS(const std::vector<std::string>& curOrder) { + if(!cudaDebug) return; + for (int i = 0; i < curOrder.size(); i++) { + if (i > 0) + printf(","); + printf("%s", curOrder[i].c_str()); + } + printf("\n"); +} + +LoopCuda::~LoopCuda() { + const int m = stmt.size(); + for (int i = 0; i < m; i++) + stmt[i].code->clear(); +} + +bool LoopCuda::symbolExists(std::string s) { + + if (body_symtab->find_variable(SgName(s.c_str())) + || parameter_symtab->find_variable(SgName(s.c_str()))) + return true; + if (globals->lookup_variable_symbol(SgName(s.c_str()))) + return true; + for (int i = 0; i < idxNames.size(); i++) + for (int j = 0; j < idxNames[i].size(); j++) + if (strcmp(idxNames[i][j].c_str(), s.c_str()) == 0) + return true; + return false; +} + +void LoopCuda::addSync(int stmt_num, std::string idxName) { + //we store these and code-gen inserts sync to omega comments where stmt + //in loop that has idxName being generated + syncs.push_back(make_pair(stmt_num, idxName)); +} + +void LoopCuda::renameIndex(int stmt_num, std::string idx, std::string newName) { + int level = findCurLevel(stmt_num, idx); + if (idxNames.size() <= stmt_num || idxNames[stmt_num].size() < level) + throw std::runtime_error("Invalid statment number of index"); + idxNames[stmt_num][level - 1] = newName.c_str(); +} + +enum Type { + Int +}; + +SgNode* wrapInIfFromMinBound(SgNode* then_part, SgForStatement* loop, + SgScopeStatement* symtab, SgVariableSymbol* bound_sym) { + // CG_roseBuilder *ocg = new CG_roseBuilder( + + SgBinaryOp* test_expr = isSgBinaryOp(loop->get_test_expr()); + SgExpression* upperBound; + SgExpression* conditional; + upperBound = test_expr->get_rhs_operand(); + CG_outputRepr *ifstmt; + + SgCallExpression *call; + if (call = isSgCallExpression(upperBound)) + if (isSgVarRefExp(call->get_function())->get_symbol()->get_name().getString() + == "__rose_lt") { + SgExprListExp* arg_list = call->get_args(); + SgExpression *if_bound = *(arg_list->get_expressions().begin()); + /*This relies on the minimum expression being the rhs operand of + * the min instruction. + */ + SgIfStmt *ifstmt = buildIfStmt( + buildLessOrEqualOp(buildVarRefExp(bound_sym), if_bound), + isSgStatement(then_part), NULL); + return isSgNode(ifstmt); + + } + +/* if (isSgConditionalExp(upperBound)) { + conditional = isSgConditionalExp(upperBound)->get_conditional_exp(); + + if (isSgBinaryOp(conditional)) { + SgBinaryOp* binop = isSgBinaryOp(conditional); + + if (isSgLessThanOp(binop) || isSgLessOrEqualOp(binop)) { + SgIfStmt *ifstmt = buildIfStmt( + buildLessOrEqualOp(buildVarRefExp(bound_sym), + test_expr), isSgStatement(then_part), NULL); + return isSgNode(ifstmt); + } + + } + + } +*/ + return then_part; +} + +/** + * This would be better if it was done by a CHiLL xformation instead of at codegen + * + * state: + * for(...) + * for(...) + * cur_body + * stmt1 + * + * stm1 is in-between two loops that are going to be reduced. The + * solution is to put stmt1 at the end of cur_body but conditionally run + * in on the last step of the for loop. + * + * A CHiLL command that would work better: + * + * for(...) + * stmt0 + * for(for i=0; i<n; i++) + * cur_body + * stmt1 + * => + * for(...) + * for(for i=0; i<n; i++) + * if(i==0) stmt0 + * cur_body + * if(i==n-1) stmt1 + */ + +std::vector<SgForStatement*> findCommentedFors(const char* index, SgNode* tnl) { + std::vector<SgForStatement *> result; + bool next_loop_ok = false; + + if (isSgBasicBlock(tnl)) { + + SgStatementPtrList& list = isSgBasicBlock(tnl)->get_statements(); + + for (SgStatementPtrList::iterator it = list.begin(); it != list.end(); + it++) { + std::vector<SgForStatement*> t = findCommentedFors(index, + isSgNode(*it)); + std::copy(t.begin(), t.end(), back_inserter(result)); + } + } else if (isSgForStatement(tnl)) { + + AstTextAttribute* att = + (AstTextAttribute*) (isSgNode(tnl)->getAttribute( + "omega_comment")); + std::string comment = att->toString(); + + if (comment.find("~cuda~") != std::string::npos + && comment.find("preferredIdx: ") != std::string::npos) { + std::string idx = comment.substr( + comment.find("preferredIdx: ") + 14, std::string::npos); + if (idx.find(" ") != std::string::npos) + idx = idx.substr(0, idx.find(" ")); + if (strcmp(idx.c_str(), index) == 0) + next_loop_ok = true; + } + + if (next_loop_ok) { + //printf("found loop %s\n", static_cast<tree_for *>(tn)->index()->name()); + result.push_back(isSgForStatement(tnl)); + } else { + //printf("looking down for loop %s\n", static_cast<tree_for *>(tn)->index()->name()); + std::vector<SgForStatement*> t = findCommentedFors(index, + isSgForStatement(tnl)->get_loop_body()); + std::copy(t.begin(), t.end(), back_inserter(result)); + } + next_loop_ok = false; + } else if (isSgIfStmt(tnl)) { + //printf("looking down if\n"); + SgIfStmt *tni = isSgIfStmt(tnl); + std::vector<SgForStatement*> t = findCommentedFors(index, + tni->get_true_body()); + std::copy(t.begin(), t.end(), back_inserter(result)); + } + + return result; +} + +SgNode* forReduce(SgForStatement* loop, SgVariableSymbol* reduceIndex, + SgScopeStatement* body_syms) { + //We did the replacements all at once with recursiveFindPreferedIdxs + //replacements r; + //r.oldsyms.append(loop->index()); + //r.newsyms.append(reduceIndex); + //tree_for* new_loop = (tree_for*)loop->clone_helper(&r, true); + SgForStatement* new_loop = loop; + + //return body one loops in + SgNode* tnl = loop_body_at_level(new_loop, 1); + //wrap in conditional if necessary + tnl = wrapInIfFromMinBound(tnl, new_loop, body_syms, reduceIndex); + return tnl; +} + +void recursiveFindRefs(SgNode* code, std::set<const SgVariableSymbol *>& syms, + SgFunctionDefinition* def) { + + SgStatement* s = isSgStatement(code); + // L = {symbols defined within 's'}, local variables declared within 's' + ASTtools::VarSymSet_t L; + ASTtools::collectDefdVarSyms(s, L); + //dump (L, "L = "); + + // U = {symbols used within 's'} + ASTtools::VarSymSet_t U; + ASTtools::collectRefdVarSyms(s, U); + //dump (U, "U = "); + + // U - L = {symbols used within 's' but not defined in 's'} + // variable references to non-local-declared variables + ASTtools::VarSymSet_t diff_U_L; + set_difference(U.begin(), U.end(), L.begin(), L.end(), + inserter(diff_U_L, diff_U_L.begin())); + //dump (diff_U_L, "U - L = "); + + // Q = {symbols defined within the function surrounding 's' that are + // visible at 's'}, including function parameters + ASTtools::VarSymSet_t Q; + ASTtools::collectLocalVisibleVarSyms(def->get_declaration(), s, Q); +// dump (Q, "Q = "); + + // (U - L) \cap Q = {variables that need to be passed as parameters + // to the outlined function} + // a sub set of variables that are not globally visible (no need to pass at all) + // It excludes the variables with a scope between global and the enclosing function + set_intersection(diff_U_L.begin(), diff_U_L.end(), Q.begin(), Q.end(), + inserter(syms, syms.begin())); + + /* std::vector<SgVariableSymbol *> scalars; + //SgNode *tnl = static_cast<const omega::CG_roseRepr *>(repr)->GetCode(); + SgStatement* stmt; + SgExpression* exp; + if (tnl != NULL) { + if(stmt = isSgStatement(tnl)){ + if(isSgBasicBlock(stmt)){ + SgStatementPtrList& stmts = isSgBasicBlock(stmt)->get_statements(); + for(int i =0; i < stmts.size(); i++){ + //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(stmts[i])); + std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(stmts[i])); + //delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } + + } + else if(isSgForStatement(stmt)){ + + SgForStatement *tnf = isSgForStatement(stmt); + //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgStatement(tnf->get_loop_body())); + std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(tnf->get_loop_body())); + //delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } + else if(isSgFortranDo(stmt)){ + SgFortranDo *tfortran = isSgFortranDo(stmt); + omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgStatement(tfortran->get_body())); + std::vector<SgVariableSymbol *> a = recursiveFindRefs(r); + delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } + + else if(isSgIfStmt(stmt) ){ + SgIfStmt* tni = isSgIfStmt(stmt); + //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgNode(tni->get_conditional())); + std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(tni->get_conditional())); + //delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + //r = new omega::CG_roseRepr(isSgNode(tni->get_true_body())); + a = recursiveFindRefs(isSgNode(tni->get_true_body())); + //delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + //r = new omega::CG_roseRepr(isSgNode(tni->get_false_body())); + a = recursiveFindRefs(isSgNode(tni->get_false_body())); + //delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + } + else if(isSgExprStatement(stmt)) { + //omega::CG_roseRepr *r = new omega::CG_roseRepr(isSgExpression(isSgExprStatement(stmt)->get_expression())); + std::vector<SgVariableSymbol *> a = recursiveFindRefs(isSgNode(isSgExprStatement(stmt)->get_expression())); + //delete r; + std::copy(a.begin(), a.end(), back_inserter(scalars)); + + } + } + } + else{ + SgExpression* op = isSgExpression(tnl); + if(isSgVarRefExp(op)){ + + scalars.push_back(isSgVarRefExp(op)->get_symbol()); + + } + else if( isSgAssignOp(op)){ + //omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgAssignOp(op)->get_lhs_operand()); + std::vector<SgVariableSymbol *> a1 = recursiveFindRefs(isSgNode(isSgAssignOp(op)->get_lhs_operand())); + //delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(scalars)); + //omega::CG_roseRepr *r2 = new omega::CG_roseRepr(isSgAssignOp(op)->get_rhs_operand()); + std::vector<SgVariableSymbol *> a2 = recursiveFindRefs(isSgNode(isSgAssignOp(op)->get_rhs_operand())); + //delete r2; + std::copy(a2.begin(), a2.end(), back_inserter(scalars)); + + } + else if(isSgBinaryOp(op)){ + // omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgBinaryOp(op)->get_lhs_operand()); + std::vector<SgVariableSymbol *> a1 = recursiveFindRefs(isSgNode(isSgBinaryOp(op)->get_lhs_operand())); + //delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(scalars)); + //omega::CG_roseRepr *r2 = new omega::CG_roseRepr(isSgBinaryOp(op)->get_rhs_operand()); + std::vector<SgVariableSymbol *> a2 = recursiveFindRefs((isSgBinaryOp(op)->get_rhs_operand())); + //delete r2; + std::copy(a2.begin(), a2.end(), back_inserter(scalars)); + } + else if(isSgUnaryOp(op)){ + //omega::CG_roseRepr *r1 = new omega::CG_roseRepr(isSgUnaryOp(op)->get_operand()); + std::vector<SgVariableSymbol *> a1 = recursiveFindRefs(isSgNode(isSgUnaryOp(op)->get_operand())); + //delete r1; + std::copy(a1.begin(), a1.end(), back_inserter(scalars)); + } + + } + return scalars; + + + */ + +} + +SgNode* recursiveFindReplacePreferedIdxs(SgNode* code, SgSymbolTable* body_syms, + SgSymbolTable* param_syms, SgScopeStatement* body, + std::map<std::string, SgVariableSymbol*>& loop_idxs, + SgGlobal* globalscope, bool sync = false) { + //tree_node_list* tnl = new tree_node_list; + //tree_node_list_iter tnli(code); + SgVariableSymbol* idxSym = 0; + std::vector<SgStatement*> r1; + std::vector<SgNode*> r2; + SgNode* tnli; + SgNode* tnli1; + SgNode* tnli2; + SgBasicBlock * clone; + + if (isSgForStatement(code)) { + AstTextAttribute* att = + (AstTextAttribute*) (isSgNode(code)->getAttribute( + "omega_comment")); + + std::string comment; + if (att != NULL) + comment = att->toString(); + + if (comment.find("~cuda~") != std::string::npos + && comment.find("preferredIdx: ") != std::string::npos) { + std::string idx = comment.substr( + comment.find("preferredIdx: ") + 14, std::string::npos); + if (idx.find(" ") != std::string::npos) + idx = idx.substr(0, idx.find(" ")); + if (loop_idxs.find(idx) != loop_idxs.end()) + idxSym = loop_idxs.find(idx)->second; + //Get the proc variable sybol for this preferred index + if (idxSym == 0) { + idxSym = body_syms->find_variable(idx.c_str()); + if (!idxSym) + idxSym = param_syms->find_variable(idx.c_str()); + //printf("idx not found: lookup %p\n", idxSym); + if (!idxSym) { + SgVariableDeclaration* defn = buildVariableDeclaration( + SgName((char*) idx.c_str()), buildIntType()); + //idxSym = new var_sym(type_s32, (char*)idx.c_str()); + SgInitializedNamePtrList& variables = defn->get_variables(); + SgInitializedNamePtrList::const_iterator i = + variables.begin(); + SgInitializedName* initializedName = *i; + SgVariableSymbol* vs = new SgVariableSymbol( + initializedName); + prependStatement(defn, body); + vs->set_parent(body_syms); + body_syms->insert(SgName((char*) idx.c_str()), vs); + idxSym = vs; + //printf("idx created and inserted\n"); + } + //Now insert into our map for future + if (cudaDebug) + std::cout << idx << "\n\n"; + loop_idxs.insert(make_pair(idx, idxSym)); + } + //See if we have a sync as well + if (comment.find("sync") != std::string::npos) { + //printf("Inserting sync after current block\n"); + sync = true; + } + + } + if (idxSym) { + SgForInitStatement* list = + isSgForStatement(code)->get_for_init_stmt(); + SgStatementPtrList& initStatements = list->get_init_stmt(); + SgStatementPtrList::const_iterator j = initStatements.begin(); + const SgVariableSymbol* index; + + if (SgExprStatement *expr = isSgExprStatement(*j)) + if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) + if (SgVarRefExp* var_ref = isSgVarRefExp( + op->get_lhs_operand())) + index = var_ref->get_symbol(); + + std::vector<SgVarRefExp *> array = substitute(code, index, NULL, + isSgNode(body_syms)); + + for (int j = 0; j < array.size(); j++) + array[j]->set_symbol(idxSym); + } + + SgStatement* body_ = isSgStatement( + recursiveFindReplacePreferedIdxs( + isSgNode((isSgForStatement(code)->get_loop_body())), + body_syms, param_syms, body, loop_idxs, globalscope)); + + omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code); + omega::CG_outputRepr* block = tnl->clone(); + tnli = static_cast<const omega::CG_roseRepr *>(block)->GetCode(); + + isSgForStatement(tnli)->set_loop_body(body_); + body_->set_parent(tnli); + + if (idxSym) { + SgForInitStatement* list = + isSgForStatement(tnli)->get_for_init_stmt(); + SgStatementPtrList& initStatements = list->get_init_stmt(); + SgStatementPtrList::const_iterator j = initStatements.begin(); + const SgVariableSymbol* index; + + if (SgExprStatement *expr = isSgExprStatement(*j)) + if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) + if (SgVarRefExp* var_ref = isSgVarRefExp( + op->get_lhs_operand())) + index = var_ref->get_symbol(); + + std::vector<SgVarRefExp *> array = substitute(tnli, index, NULL, + isSgNode(body_syms)); + + for (int j = 0; j < array.size(); j++) + array[j]->set_symbol(idxSym); + } + // std::cout << isSgNode(body_)->unparseToString() << "\n\n"; + if (att != NULL) + tnli->setAttribute("omega_comment", att); + + if (sync) { + SgName name_syncthreads("__syncthreads"); + SgFunctionSymbol * syncthreads_symbol = + globalscope->lookup_function_symbol(name_syncthreads); + + // Create a call to __syncthreads(): + SgFunctionCallExp * syncthreads_call = buildFunctionCallExp( + syncthreads_symbol, buildExprListExp()); + + SgExprStatement* stmt = buildExprStatement(syncthreads_call); + + /* if (SgBasicBlock* bb = isSgBasicBlock( + isSgForStatement(code)->get_loop_body())) + appendStatement(isSgStatement(stmt), bb); + + else if (SgStatement* ss = isSgStatement( + isSgForStatement(code)->get_loop_body())) { + SgBasicBlock* bb2 = buildBasicBlock(); + + isSgNode(ss)->set_parent(bb2); + appendStatement(ss, bb2); + + appendStatement(isSgStatement(stmt), bb2); + isSgNode(stmt)->set_parent(bb2); + isSgForStatement(code)->set_loop_body(bb2); + isSgNode(bb2)->set_parent(code); + } + */ + + SgBasicBlock* bb2 = buildBasicBlock(); + + bb2->append_statement(isSgStatement(tnli)); + bb2->append_statement(stmt); + /* SgNode* parent = code->get_parent(); + if(!isSgStatement(parent)) + throw loop_error("Parent not a statement"); + + if(isSgForStatement(parent)){ + if(SgStatement *ss = isSgForStatement(isSgForStatement(parent)->get_loop_body())){ + omega::CG_roseRepr * tnl = new omega::CG_roseRepr(ss); + omega::CG_outputRepr* block= tnl->clone(); + + SgNode *new_ss = static_cast<const omega::CG_roseRepr *>(block)->GetCode(); + SgBasicBlock* bb2 = buildBasicBlock(); + + isSgNode(new_ss)->set_parent(bb2); + appendStatement(isSgStatement(new_ss), bb2); + appendStatement(isSgStatement(stmt), bb2); + isSgNode(stmt)->set_parent(bb2); + + isSgStatement(parent)->replace_statement_from_basicBlock(ss, isSgStatement(bb2)); + + }else if(isSgBasicBlock(isSgForStatement(parent)->get_loop_body())) + isSgStatement(isSgForStatement(parent)->get_loop_body())->insert_statement(isSgStatement(code), stmt, false); + else + throw loop_error("parent statement type undefined!!"); + + } + else if(isSgBasicBlock(parent)) + isSgStatement(parent)->insert_statement(isSgStatement(code), stmt, false); + else + throw loop_error("parent statement type undefined!!"); + + //tnl->print(); + * + * + */ + sync = true; + return isSgNode(bb2); + + } else + return tnli; + } else if (isSgIfStmt(code)) { + SgStatement* body_ = isSgStatement( + recursiveFindReplacePreferedIdxs( + isSgNode((isSgIfStmt(code)->get_true_body())), + body_syms, param_syms, body, loop_idxs, globalscope)); + + omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code); + omega::CG_outputRepr* block = tnl->clone(); + tnli = static_cast<const omega::CG_roseRepr *>(block)->GetCode(); + + isSgIfStmt(tnli)->set_true_body(body_); + + if ((isSgIfStmt(code)->get_false_body())) + isSgIfStmt(tnli)->set_false_body( + isSgStatement( + recursiveFindReplacePreferedIdxs( + isSgNode( + (isSgIfStmt(code)->get_false_body())), + body_syms, param_syms, body, loop_idxs, + globalscope))); + + return tnli; + } else if (isSgStatement(code) && !isSgBasicBlock(code)) { + omega::CG_roseRepr * tnl = new omega::CG_roseRepr(code); + omega::CG_outputRepr* block = tnl->clone(); + tnli = static_cast<const omega::CG_roseRepr *>(block)->GetCode(); + + return tnli; + + } else if (isSgBasicBlock(code)) { + SgStatementPtrList& tnl = isSgBasicBlock(code)->get_statements(); + + SgStatementPtrList::iterator temp; + clone = buildBasicBlock(); + bool sync_found = false; + for (SgStatementPtrList::const_iterator it = tnl.begin(); + it != tnl.end(); it++) { + + if (isSgForStatement(*it)) { + AstTextAttribute* att = + (AstTextAttribute*) (isSgNode(*it)->getAttribute( + "omega_comment")); + + std::string comment; + if (att != NULL) + comment = att->toString(); + + if (comment.find("~cuda~") != std::string::npos + && comment.find("preferredIdx: ") + != std::string::npos) { + std::string idx = comment.substr( + comment.find("preferredIdx: ") + 14, + std::string::npos); + if (idx.find(" ") != std::string::npos) + idx = idx.substr(0, idx.find(" ")); + //printf("sym_tab preferred index: %s\n", idx.c_str()); + if (loop_idxs.find(idx) != loop_idxs.end()) + idxSym = loop_idxs.find(idx)->second; + //Get the proc variable sybol for this preferred index + if (idxSym == 0) { + idxSym = body_syms->find_variable(idx.c_str()); + if (!idxSym) + idxSym = param_syms->find_variable(idx.c_str()); + //printf("idx not found: lookup %p\n", idxSym); + if (!idxSym) { + SgVariableDeclaration* defn = + buildVariableDeclaration( + SgName((char*) idx.c_str()), + buildIntType()); + //idxSym = new var_sym(type_s32, (char*)idx.c_str()); + SgInitializedNamePtrList& variables = + defn->get_variables(); + SgInitializedNamePtrList::const_iterator i = + variables.begin(); + SgInitializedName* initializedName = *i; + SgVariableSymbol* vs = new SgVariableSymbol( + initializedName); + prependStatement(defn, body); + vs->set_parent(body_syms); + body_syms->insert(SgName((char*) idx.c_str()), vs); + //printf("idx created and inserted\n"); + idxSym = vs; + } + //Now insert into our map for future + if (cudaDebug) + std::cout << idx << "\n\n"; + loop_idxs.insert(make_pair(idx, idxSym)); + + } + //See if we have a sync as well + if (comment.find("sync") != std::string::npos) { + //printf("Inserting sync after current block\n"); + sync = true; + } + + } + if (idxSym) { + SgForInitStatement* list = + isSgForStatement(*it)->get_for_init_stmt(); + SgStatementPtrList& initStatements = list->get_init_stmt(); + SgStatementPtrList::const_iterator j = + initStatements.begin(); + const SgVariableSymbol* index; + + if (SgExprStatement *expr = isSgExprStatement(*j)) + if (SgAssignOp* op = isSgAssignOp( + expr->get_expression())) + if (SgVarRefExp* var_ref = isSgVarRefExp( + op->get_lhs_operand())) + index = var_ref->get_symbol(); + + std::vector<SgVarRefExp *> array = substitute(*it, index, + NULL, isSgNode(body_syms)); + + for (int j = 0; j < array.size(); j++) + array[j]->set_symbol(idxSym); + + } + + SgStatement* body_ = + isSgStatement( + recursiveFindReplacePreferedIdxs( + isSgNode( + (isSgForStatement(*it)->get_loop_body())), + body_syms, param_syms, body, loop_idxs, + globalscope)); + + omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it); + omega::CG_outputRepr* block = tnl->clone(); + tnli = + static_cast<const omega::CG_roseRepr *>(block)->GetCode(); + + isSgForStatement(tnli)->set_loop_body(body_); + body_->set_parent(tnli); + if (idxSym) { + SgForInitStatement* list = + isSgForStatement(tnli)->get_for_init_stmt(); + SgStatementPtrList& initStatements = list->get_init_stmt(); + SgStatementPtrList::const_iterator j = + initStatements.begin(); + const SgVariableSymbol* index; + + if (SgExprStatement *expr = isSgExprStatement(*j)) + if (SgAssignOp* op = isSgAssignOp( + expr->get_expression())) + if (SgVarRefExp* var_ref = isSgVarRefExp( + op->get_lhs_operand())) + index = var_ref->get_symbol(); + + std::vector<SgVarRefExp *> array = substitute(tnli, index, + NULL, isSgNode(body_syms)); + + for (int j = 0; j < array.size(); j++) + array[j]->set_symbol(idxSym); + } + idxSym = 0; + // std::cout << isSgNode(body_)->unparseToString() << "\n\n"; + if (att != NULL) + tnli->setAttribute("omega_comment", att); + clone->append_statement(isSgStatement(tnli)); + if (sync) { + SgName name_syncthreads("__syncthreads"); + SgFunctionSymbol * syncthreads_symbol = + globalscope->lookup_function_symbol( + name_syncthreads); + + // Create a call to __syncthreads(): + SgFunctionCallExp * syncthreads_call = buildFunctionCallExp( + syncthreads_symbol, buildExprListExp()); + + SgExprStatement* stmt = buildExprStatement( + syncthreads_call); + + /* if (SgBasicBlock* bb = isSgBasicBlock( + isSgForStatement(code)->get_loop_body())) + appendStatement(isSgStatement(stmt), bb); + + else if (SgStatement* ss = isSgStatement( + isSgForStatement(code)->get_loop_body())) { + SgBasicBlock* bb2 = buildBasicBlock(); + + isSgNode(ss)->set_parent(bb2); + appendStatement(ss, bb2); + + appendStatement(isSgStatement(stmt), bb2); + isSgNode(stmt)->set_parent(bb2); + isSgForStatement(code)->set_loop_body(bb2); + isSgNode(bb2)->set_parent(code); + } + */ + + //SgBasicBlock* bb2 = buildBasicBlock(); + clone->append_statement(stmt); + /* SgNode* parent = code->get_parent(); + if(!isSgStatement(parent)) + throw loop_error("Parent not a statement"); + + if(isSgForStatement(parent)){ + if(SgStatement *ss = isSgForStatement(isSgForStatement(parent)->get_loop_body())){ + omega::CG_roseRepr * tnl = new omega::CG_roseRepr(ss); + omega::CG_outputRepr* block= tnl->clone(); + + SgNode *new_ss = static_cast<const omega::CG_roseRepr *>(block)->GetCode(); + SgBasicBlock* bb2 = buildBasicBlock(); + + isSgNode(new_ss)->set_parent(bb2); + appendStatement(isSgStatement(new_ss), bb2); + appendStatement(isSgStatement(stmt), bb2); + isSgNode(stmt)->set_parent(bb2); + + isSgStatement(parent)->replace_statement_from_basicBlock(ss, isSgStatement(bb2)); + + }else if(isSgBasicBlock(isSgForStatement(parent)->get_loop_body())) + isSgStatement(isSgForStatement(parent)->get_loop_body())->insert_statement(isSgStatement(code), stmt, false); + else + throw loop_error("parent statement type undefined!!"); + + } + else if(isSgBasicBlock(parent)) + isSgStatement(parent)->insert_statement(isSgStatement(code), stmt, false); + else + throw loop_error("parent statement type undefined!!"); + + //tnl->print(); + * + * + */ + sync = true; + // return isSgNode(bb2); + + } + + // return tnli; + } else if (isSgIfStmt(*it)) { + SgStatement* body_ = isSgStatement( + recursiveFindReplacePreferedIdxs( + isSgNode((isSgIfStmt(*it)->get_true_body())), + body_syms, param_syms, body, loop_idxs, + globalscope)); + + omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it); + omega::CG_outputRepr* block = tnl->clone(); + tnli1 = + static_cast<const omega::CG_roseRepr *>(block)->GetCode(); + + isSgIfStmt(tnli1)->set_true_body(body_); + + if ((isSgIfStmt(*it)->get_false_body())) + isSgIfStmt(tnli1)->set_false_body( + isSgStatement( + recursiveFindReplacePreferedIdxs( + isSgNode( + (isSgIfStmt(*it)->get_false_body())), + body_syms, param_syms, body, + loop_idxs, globalscope))); + + clone->append_statement(isSgStatement(tnli1)); + //return tnli; + } else if (isSgStatement(*it)) { + omega::CG_roseRepr * tnl = new omega::CG_roseRepr(*it); + omega::CG_outputRepr* block = tnl->clone(); + tnli2 = + static_cast<const omega::CG_roseRepr *>(block)->GetCode(); + + clone->append_statement(isSgStatement(tnli2)); + //return tnli; + + } + } + + return isSgNode(clone); + + } + + /* if (!isSgBasicBlock( + recursiveFindReplacePreferedIdxs(isSgNode(*it), body_syms, + param_syms, body, loop_idxs, globalscope))) { + SgStatement *to_push = isSgStatement( + recursiveFindReplacePreferedIdxs(isSgNode(*it), + body_syms, param_syms, body, loop_idxs, + globalscope, sync)); + clone->append_statement(to_push); + + if ((sync_found) && isSgForStatement(to_push)) { + SgName name_syncthreads("__syncthreads"); + SgFunctionSymbol * syncthreads_symbol = + globalscope->lookup_function_symbol( + name_syncthreads); + + // Create a call to __syncthreads(): + SgFunctionCallExp * syncthreads_call = buildFunctionCallExp( + syncthreads_symbol, buildExprListExp()); + + SgExprStatement* stmt = buildExprStatement( + syncthreads_call); + + clone->append_statement(isSgStatement(stmt)); + } + // std::cout<<isSgNode(*it)->unparseToString()<<"\n\n"; + } else { + + SgStatementPtrList& tnl2 = isSgBasicBlock( + recursiveFindReplacePreferedIdxs(isSgNode(*it), + body_syms, param_syms, body, loop_idxs, + globalscope))->get_statements(); + for (SgStatementPtrList::const_iterator it2 = tnl2.begin(); + it2 != tnl2.end(); it2++) { + clone->append_statement(*it2); + + sync_found = true; + // std::cout<<isSgNode(*it2)->unparseToString()<<"\n\n"; + } + } + + } + return isSgNode(clone); + } + */ +// return tnl; +} + +// loop_vars -> array references +// loop_idxs -> <idx_name,idx_sym> map for when we encounter a loop with a different preferredIndex +// dim_vars -> out param, fills with <old,new> var_sym pair for 2D array dimentions (messy stuff) +SgNode* swapVarReferences(SgNode* code, + std::set<const SgVariableSymbol *>& syms, SgSymbolTable* param, + SgSymbolTable* body, SgScopeStatement* body_stmt) { + //Iterate over every expression, looking up each variable and type + //reference used and possibly replacing it or adding it to our symbol + //table + // + //We use the built-in cloning helper methods to seriously help us with this! + + //Need to do a recursive mark + + std::set<const SgVariableSymbol *>::iterator myIterator; + for (myIterator = syms.begin(); myIterator != syms.end(); myIterator++) { + SgName var_name = (*myIterator)->get_name(); + std::string x = var_name.getString(); + + if ((param->find_variable(var_name) == NULL) + && (body->find_variable(var_name) == NULL)) { + SgInitializedName* decl = (*myIterator)->get_declaration(); + + SgVariableSymbol* dvs = new SgVariableSymbol(decl); + SgVariableDeclaration* var_decl = buildVariableDeclaration( + dvs->get_name(), dvs->get_type()); + + AstTextAttribute* att = (AstTextAttribute*) (isSgNode( + decl->get_declaration())->getAttribute("__shared__")); + if (isSgNode(decl->get_declaration())->attributeExists( + "__shared__")) + var_decl->get_declarationModifier().get_storageModifier().setCudaShared(); + + appendStatement(var_decl, body_stmt); + + dvs->set_parent(body); + body->insert(var_name, dvs); + } + + std::vector<SgVarRefExp *> array = substitute(code, *myIterator, NULL, + isSgNode(body)); + + SgVariableSymbol* var = (SgVariableSymbol*) (*myIterator); + for (int j = 0; j < array.size(); j++) + array[j]->set_symbol(var); + } + + return code; +} + +bool LoopCuda::validIndexes(int stmt, const std::vector<std::string>& idxs) { + for (int i = 0; i < idxs.size(); i++) { + bool found = false; + for (int j = 0; j < idxNames[stmt].size(); j++) { + if (strcmp(idxNames[stmt][j].c_str(), idxs[i].c_str()) == 0) { + found = true; + } + } + if (!found) { + return false; + } + } + return true; +} + +bool LoopCuda::cudaize_v2(std::string kernel_name, + std::map<std::string, int> array_dims, + std::vector<std::string> blockIdxs, + std::vector<std::string> threadIdxs) { + CG_outputBuilder *ocg = ir->builder(); + int stmt_num = 0; + if (cudaDebug) { + printf("cudaize_v2(%s, {", kernel_name.c_str()); + //for( + printf("}, blocks={"); + printVs(blockIdxs); + printf("}, thread={"); + printVs(threadIdxs); + printf("})\n"); + } + + this->array_dims = array_dims; + if (!validIndexes(stmt_num, blockIdxs)) { + throw std::runtime_error("One of the indexes in the block list was not " + "found in the current set of indexes."); + } + if (!validIndexes(stmt_num, threadIdxs)) { + throw std::runtime_error( + "One of the indexes in the thread list was not " + "found in the current set of indexes."); + } + if (blockIdxs.size() == 0) + throw std::runtime_error("Cudaize: Need at least one block dimention"); + int block_level = 0; + //Now, we will determine the actual size (if possible, otherwise + //complain) for the block dimentions and thread dimentions based on our + //indexes and the relations for our stmt; + for (int i = 0; i < blockIdxs.size(); i++) { + int level = findCurLevel(stmt_num, blockIdxs[i]); + int ub, lb; + CG_outputRepr* ubrepr = extractCudaUB(stmt_num, level, ub, lb); + if (lb != 0) { + //attempt to "normalize" the loop with an in-place tile and then re-check our bounds + if (cudaDebug) + printf( + "Cudaize: doing tile at level %d to try and normalize lower bounds\n", + level); + tile(stmt_num, level, 1, level, CountedTile); + idxNames[stmt_num].insert(idxNames[stmt_num].begin() + (level), ""); //TODO: possibly handle this for all sibling stmts + ubrepr = extractCudaUB(stmt_num, level, ub, lb); + } + if (lb != 0) { + char buf[1024]; + sprintf(buf, + "Cudaize: Loop at level %d does not have 0 as it's lower bound", + level); + throw std::runtime_error(buf); + } + if (ub < 0) { + char buf[1024]; + sprintf(buf, + "Cudaize: Loop at level %d does not have a hard upper bound", + level); + //Anand: Commenting out error indication for lack of constant upper bound + //throw std::runtime_error(buf); + } + if (cudaDebug) + printf("block idx %s level %d lb: %d ub %d\n", blockIdxs[i].c_str(), + level, lb, ub); + if (i == 0) { + block_level = level; + if (ubrepr == NULL) { + cu_bx = ub + 1; + cu_bx_repr = NULL; + } else { + cu_bx = 0; + cu_bx_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); + } + idxNames[stmt_num][level - 1] = "bx"; + } else if (i == 1) { + if (ubrepr == NULL) { + cu_by = ub + 1; + cu_by_repr = NULL; + } else { + cu_by = 0; + cu_by_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); + } + idxNames[stmt_num][level - 1] = "by"; + } + } + if (!cu_by && !cu_by_repr) + block_level = 0; + int thread_level1 = 0; + int thread_level2 = 0; + for (int i = 0; i < threadIdxs.size(); i++) { + int level = findCurLevel(stmt_num, threadIdxs[i]); + int ub, lb; + CG_outputRepr* ubrepr = extractCudaUB(stmt_num, level, ub, lb); + if (lb != 0) { + //attempt to "normalize" the loop with an in-place tile and then re-check our bounds + if (cudaDebug) + printf( + "Cudaize: doing tile at level %d to try and normalize lower bounds\n", + level); + tile(stmt_num, level, 1, level, CountedTile); + idxNames[stmt_num].insert(idxNames[stmt_num].begin() + (level), ""); + ubrepr = extractCudaUB(stmt_num, level, ub, lb); + } + if (lb != 0) { + char buf[1024]; + sprintf(buf, + "Cudaize: Loop at level %d does not have 0 as it's lower bound", + level); + throw std::runtime_error(buf); + } + if (ub < 0) { + char buf[1024]; + sprintf(buf, + "Cudaize: Loop at level %d does not have a hard upper bound", + level); + //Anand: Commenting out error indication for lack of constant upper bound + //throw std::runtime_error(buf); + } + + if (cudaDebug) + printf("thread idx %s level %d lb: %d ub %d\n", + threadIdxs[i].c_str(), level, lb, ub); + if (i == 0) { + thread_level1 = level; + if (ubrepr == NULL) { + cu_tx = ub + 1; + cu_tx_repr = NULL; + } else { + cu_tx = 0; + cu_tx_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); + } + idxNames[stmt_num][level - 1] = "tx"; + } else if (i == 1) { + thread_level2 = level; + if (ubrepr == NULL) { + cu_ty = ub + 1; + cu_ty_repr = NULL; + } else { + cu_ty = 0; + cu_ty_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); + } + idxNames[stmt_num][level - 1] = "ty"; + } else if (i == 2) { + if (ubrepr == NULL) { + cu_tz = ub + 1; + cu_tz_repr = NULL; + } else { + cu_tz = 0; + cu_tz_repr = ocg->CreatePlus(ubrepr, ocg->CreateInt(1)); + } + idxNames[stmt_num][level - 1] = "tz"; + } + } + if (!cu_ty && !cu_ty_repr) + thread_level1 = 0; + if (!cu_tz && !cu_tz_repr) + thread_level2 = 0; + + //Make changes to nonsplitlevels + const int m = stmt.size(); + for (int i = 0; i < m; i++) { + if (block_level) { + //stmt[i].nonSplitLevels.append((block_level)*2); + stmt_nonSplitLevels[i].push_back((block_level) * 2); + } + if (thread_level1) { + //stmt[i].nonSplitLevels.append((thread_level1)*2); + stmt_nonSplitLevels[i].push_back((thread_level1) * 2); + } + if (thread_level2) { + //stmt[i].nonSplitLevels.append((thread_level1)*2); + stmt_nonSplitLevels[i].push_back((thread_level1) * 2); + } + } + + if (cudaDebug) { + printf("Codegen: current names: "); + printVS(idxNames[stmt_num]); + } + //Set codegen flag + code_gen_flags |= GenCudaizeV2; + + //Save array dimention sizes + this->array_dims = array_dims; + cu_kernel_name = kernel_name.c_str(); + +} + +/* + * setupConstantVar + * handles constant variable declaration + * and adds a global constant variable + * parameters: + * constant - the constant_memory_mapping object for this loop + * arr_def - the VarDefs object for the mapped variable + * globals - Rose Global variables + * i - an index to keep new variable names unique + * symtab - global symbol table + */ +static void setupConstantVar(constant_memory_mapping* constant, VarDefs* arr_def, SgGlobal* globals, int i, SgSymbolTable* symtab) { + char* buf1 = new char[32]; + snprintf(buf1, 32, "cs%dRef", i+1); + arr_def->secondName = buf1; + + char buf2[64]; + snprintf(buf2, 64, "__device__ __constant__ float"); + + SgVariableDeclaration* consvar_decl = buildVariableDeclaration( + SgName(std::string(buf1)), buildArrayType( + buildOpaqueType(SgName(buf2),globals), + arr_def->size_expr)); + SgInitializedNamePtrList& variables = consvar_decl->get_variables(); + SgInitializedNamePtrList::const_iterator j = variables.begin(); + SgInitializedName* initializedName = *j; + SgVariableSymbol* consvar_sym = new SgVariableSymbol(initializedName); + prependStatement(consvar_decl, globals); + + consvar_sym->set_parent(symtab); + symtab->insert(SgName(std::string(buf1)), consvar_sym); + + constant->set_mapped_symbol(arr_def->original_name.c_str(), consvar_sym); + constant->set_vardef(arr_def->original_name.c_str(), arr_def); +} + +/* + * cudaBindConstantVar + * allocs a variable to constant memory + * constant - the constant mapping object + * arr_def - the VarDefs abject + * globals - global symbol table + * stmt_list - the GPU functions' statement list + */ +static void cudaBindConstantVar(constant_memory_mapping* constant, VarDefs* arr_def, SgGlobal* globals, SgStatementPtrList* stmt_list) { + SgName cudaMemcpyToSymbol_name("cudaMemcpyToSymbol"); + SgFunctionDeclaration* cudaMemcpyToSymbol_decl = buildNondefiningFunctionDeclaration( + cudaMemcpyToSymbol_name, buildVoidType(), buildFunctionParameterList(), globals); + SgExprListExp* args = buildExprListExp(); + args->append_expression(buildCastExp(constant->get_mapped_symbol_exp(arr_def->original_name.c_str()), + buildPointerType(buildVoidType()))); + args->append_expression(buildVarRefExp(arr_def->in_data)); + args->append_expression(arr_def->size_expr); + stmt_list->push_back(buildExprStatement( + buildFunctionCallExp(buildFunctionRefExp(cudaMemcpyToSymbol_decl), args))); +} + +static void consmapArrayRefs(constant_memory_mapping* constant, std::vector<IR_ArrayRef*>* refs, SgGlobal* globals, IR_Code* ir, CG_roseBuilder* ocg) { + // if constant mapping is not being used, ignore this function + if(constant == NULL) return; + for(int i = 0; i < refs->size(); i++) { + IR_ArrayRef* aref = (*refs)[i]; + if(constant->is_array_mapped(aref->name().c_str())) { + // get array reference dimensions + int dims = aref->symbol()->n_dim(); + if(dims > 2) { + printf(" \n CHiLL does not handle constant memory mapping for more than 2D arrays.\n"); + return; + } + + SgExpression* varexp = constant->get_mapped_symbol_exp(aref->name().c_str()); + SgExpression* index_exp; + // build index expression + if(dims == 1) { + index_exp = static_cast<omega::CG_roseRepr*>(aref->index(0)->clone())->GetExpression(); + } + if(dims == 2) { + VarDefs* arr_def = constant->get_vardef(aref->name().c_str()); + CG_outputRepr* i0 = aref->index(0)->clone(); + CG_outputRepr* i1 = aref->index(1)->clone(); + CG_outputRepr* sz = new CG_roseRepr(buildIntVal(arr_def->size_multi_dim[0])); + CG_outputRepr* exp = ocg->CreatePlus(ocg->CreateTimes(sz->clone(), i0), i1); + index_exp = static_cast<omega::CG_roseRepr*>(exp->clone())->GetExpression(); + } + ir->ReplaceExpression(aref, new CG_roseRepr(buildPntrArrRefExp(varexp, index_exp))); + } + } +} + +/* + * setupTexmappingVar + * handles texture variable declaration + * and adds a global texture object + * parameters: + * texture - the texture_memory_mapping object + * arr_def - the VarDefs object for the mapped variable + * globals - Rose Global variables + * i - an index to keep the new variable names unique + * devptr_sym - the devptr that the original variable is associated with + * symtab - GPU function symbol table + */ +static void setupTexmappingVar(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals, int i, SgVariableSymbol* devptr_sym, SgSymbolTable* symtab) { + char* buf1 = new char[32]; + snprintf(buf1, 32, "tex%dRef", i+1); + arr_def->secondName = buf1; + + char buf2[64]; + // single-dimensional + snprintf(buf2, 64, "texture<float, %d, cudaReadModeElementType>", 1); + // multi-dimensional + // snprintf(buf2, 64, "texture<float, %d, cudaReadModeElemetType>", (int)(arr_def->size_multi_dim.size())); //*/ + + SgVariableDeclaration* texvar_decl = buildVariableDeclaration(SgName(std::string(buf1)), buildOpaqueType(buf2, globals)); + + SgInitializedNamePtrList& variables = texvar_decl->get_variables(); + SgInitializedNamePtrList::const_iterator j = variables.begin(); + SgInitializedName* initializedName = *j; + SgVariableSymbol* texvar_sym = new SgVariableSymbol(initializedName); + prependStatement(texvar_decl, globals); + + texvar_sym->set_parent(symtab); + symtab->insert(SgName(buf1), texvar_sym); + + texture->set_mapped_symbol(arr_def->original_name.c_str(), texvar_sym); + texture->set_devptr_symbol(arr_def->original_name.c_str(), devptr_sym); + texture->set_vardef(arr_def->original_name.c_str(), arr_def); +} + + +/* + * One dimensional version of cudaBindTexture + * see cudaBindTexture for details + */ +static SgFunctionCallExp* cudaBindTexture1D(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals) { + SgName cudaBindTexture_name("cudaBindTexture"); + SgFunctionDeclaration* cudaBindTexture_decl = buildNondefiningFunctionDeclaration( + cudaBindTexture_name, buildVoidType(), buildFunctionParameterList(), globals); + + SgExprListExp* args = buildExprListExp(); + args->append_expression(buildIntVal(0)); + args->append_expression(texture->get_mapped_symbol_exp(arr_def->original_name.c_str())); + args->append_expression(texture->get_devptr_symbol_exp(arr_def->original_name.c_str())); + args->append_expression(arr_def->size_expr); + return buildFunctionCallExp(buildFunctionRefExp(cudaBindTexture_decl), args); +} + +/* + * Two dimensional version of cudaBindTexture + * see cudaBindTexture for details + */ +//static SgFunctionCallExp* cudaBindTexture2D(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals) { +// SgName cudaBindTexture_name("cudaBindTexture2D"); +// SgFunctionDeclaration* cudaBindTexture_decl = buildNondefiningFunctionDeclaration( +// cudaBindTexture_name, buildVoidType(), buildFunctionParameterList(), globals); +// +// SgExprListExp* args = buildExprListExp(); +// args->append_expression(buildIntVal(0)); +// args->append_expression(texture->get_tex_mapped_symbol_exp(arr_def->original_name.c_str())); +// args->append_expression(texture->get_devptr_symbol_exp(arr_def->original_name.c_str())); +// args->append_expression(buildIntVal(texture->get_dim_length(arr_def->original_name.c_str(), 0))); +// args->append_expression(buildIntVal(texture->get_dim_length(arr_def->original_name.c_str(), 1))); +// args->append_expression(arr_def->size_expr); +// return buildFunctionCallExp(buildFunctionRefExp(cudaBindTexture_decl), args); +//} + +/* + * cudaBindTexture + * binds a variable to a texture + * parameters: + * texture - the texture mapping object + * arr_def - the VarDefs object + * globals - global symbol table + * stmt_list - the GPU functions' statement list + * notes: + * only supports binding 1D textures, may need to consider cudaBindTexture2D for 2D textures + */ +static void cudaBindTexture(texture_memory_mapping* texture, VarDefs* arr_def, SgGlobal* globals, SgStatementPtrList* stmt_list) { + //int dims = (int)(arr_def->size_multi_dim.size()); + //int dims = texture->get_dims(arr_def->original_name.c_str()); + //if(dims == 1) + stmt_list->push_back( + buildExprStatement(cudaBindTexture1D(texture, arr_def, globals))); + //if(dims == 2) + // stmt_list->push_back( + // buildExprStatement(cudaBindTexture2D(texture, arr_def, globals))); +} + +/* + * texmapArrayRefs + * maps array reference expresions of texture mapped variables to the tex1D function + * parameters: + * texture - the texture mapping object + * refs - a list of all array read operations + * globals - global symbol table + * ir - handles IR_Code operations + * ocg - handles CG_roseBuilder operations +**/ +static void texmapArrayRefs(texture_memory_mapping* texture, std::vector<IR_ArrayRef*>* refs, SgGlobal* globals, IR_Code* ir, CG_roseBuilder *ocg) { + // if texture mapping is not being used, ignore this function + if(texture == NULL) return; + for(int i = 0; i < refs->size(); i++) { + IR_ArrayRef* aref = (*refs)[i]; + if(texture->is_array_mapped(aref->name().c_str())) { + + // get array dimensions + VarDefs* arr_def = texture->get_vardef(aref->name().c_str()); + int dims = aref->symbol()->n_dim(); + if(dims > 2) { + printf(" \n CHiLL does not handle texture mapping for more than 2D arrays.\n"); + // TODO throw some sort of error. or handle in texture_copy function + return; + } + + // build texture lookup function declaration + char texNDfetch_strName[16]; + sprintf(texNDfetch_strName, "tex%dDfetch", 1); // for now, only support tex1Dfetch + //sprintf(texNDfetch_strName, "tex%dDfetch", dims); + SgFunctionDeclaration* fetch_decl = buildNondefiningFunctionDeclaration( + SgName(texNDfetch_strName), buildFloatType(), buildFunctionParameterList(), globals); + + // build args + SgExprListExp* args = buildExprListExp(); + args->append_expression(texture->get_mapped_symbol_exp(aref->name().c_str())); + + // set indexing args + //for(int i = 0; i < dims; i++) { + // args->append_expression((static_cast<omega::CG_roseRepr*>(aref->index(i)->clone()))->GetExpression()); + //} + if(dims == 1) { + args->append_expression(static_cast<omega::CG_roseRepr*>(aref->index(0)->clone())->GetExpression()); + } + else if(dims == 2) { + CG_outputRepr* i0 = aref->index(0)->clone(); + CG_outputRepr* i1 = aref->index(1)->clone(); + CG_outputRepr* sz = new CG_roseRepr(buildIntVal(arr_def->size_multi_dim[0])); + CG_outputRepr* expr = ocg->CreatePlus(ocg->CreateTimes(sz->clone(), i0), i1); + args->append_expression(static_cast<omega::CG_roseRepr*>(expr->clone())->GetExpression()); + } + + // build function call and replace original array ref + SgFunctionCallExp* fetch_call = buildFunctionCallExp(buildFunctionRefExp(fetch_decl), args); + ir->ReplaceExpression(aref, new CG_roseRepr(fetch_call)); + } + } +} + +SgNode* LoopCuda::cudaize_codegen_v2() { + if(cudaDebug) + printf("cudaize codegen V2\n"); + CG_roseBuilder *ocg = dynamic_cast<CG_roseBuilder*>(ir->builder()); + if (!ocg) + return false; + + //protonu--adding an annote to track texture memory type + //ANNOTE(k_cuda_texture_memory, "cuda texture memory", TRUE); + //ANNOTE(k_cuda_constant_memory, "cuda constant memory", TRUE); + int tex_mem_on = 0; + int cons_mem_on = 0; + + + + CG_outputRepr* repr; + std::vector<VarDefs> arrayVars; + std::vector<VarDefs> localScopedVars; + + std::vector<IR_ArrayRef *> ro_refs; + std::vector<IR_ArrayRef *> wo_refs; + std::set<std::string> uniqueRefs; + std::set<std::string> uniqueWoRefs; + std::set<const SgVariableSymbol *> syms; + std::set<const SgVariableSymbol *> psyms; + std::set<const SgVariableSymbol *> pdSyms; + SgStatementPtrList* replacement_list = new SgStatementPtrList; + + for (int j = 0; j < stmt.size(); j++) { + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[j].code); + for (int i = 0; i < refs.size(); i++) { + //printf("ref %s wo %d\n", static_cast<const char*>(refs[i]->name()), refs[i]->is_write()); + SgVariableSymbol* var = body_symtab->find_variable( + SgName((char*) refs[i]->name().c_str())); + SgVariableSymbol* var2 = parameter_symtab->find_variable( + SgName((char*) refs[i]->name().c_str())); + + //If the array is not a parameter, then it's a local array and we + //want to recreate it as a stack variable in the kernel as opposed to + //passing it in. + if (var != NULL) { + //anand-- needs modification, if variable is parameter it wont be part of the + // block's symbol table but the functiond definition's symbol table + + continue; + } + if (uniqueRefs.find(refs[i]->name()) == uniqueRefs.end()) { + + uniqueRefs.insert(refs[i]->name()); + if (refs[i]->is_write()) { + uniqueWoRefs.insert(refs[i]->name()); + wo_refs.push_back(refs[i]); + } else + ro_refs.push_back(refs[i]); + } + if (refs[i]->is_write() + && uniqueWoRefs.find(refs[i]->name()) + == uniqueWoRefs.end()) { + uniqueWoRefs.insert(refs[i]->name()); + wo_refs.push_back(refs[i]); + //printf("adding %s to wo\n", static_cast<const char*>(refs[i]->name())); + } + pdSyms.insert((const SgVariableSymbol*) var2); + } + } + + if (cudaDebug) { + printf("reading from array "); + for (int i = 0; i < ro_refs.size(); i++) + printf("'%s' ", ro_refs[i]->name().c_str()); + printf("and writing to array "); + for (int i = 0; i < wo_refs.size(); i++) + printf("'%s' ", wo_refs[i]->name().c_str()); + printf("\n"); + } + const char* gridName = "dimGrid"; + const char* blockName = "dimBlock"; + + //TODO: Could allow for array_dims_vars to be a mapping from array + //references to to variable names that define their length. + SgVariableSymbol* dim1 = 0; + SgVariableSymbol* dim2 = 0; + + for (int i = 0; i < wo_refs.size(); i++) { + //TODO: Currently assume all arrays are floats of one or two dimentions + SgVariableSymbol* outArray = 0; + std::string name = wo_refs[i]->name(); + outArray = body_symtab->find_variable(SgName((char*) name.c_str())); + int size_n_d; + if (outArray == NULL) + outArray = parameter_symtab->find_variable( + SgName((char*) name.c_str())); + + VarDefs v; + v.size_multi_dim = std::vector<int>(); + char buf[32]; + snprintf(buf, 32, "devO%dPtr", i + 1); + v.name = buf; + if (isSgPointerType(outArray->get_type())) { + if (isSgArrayType( + isSgNode( + isSgPointerType(outArray->get_type())->get_base_type()))) { + // v.type = ((array_type *)(((ptr_type *)(outArray->type()))->ref_type()))->elem_type(); + SgType* t = + isSgPointerType(outArray->get_type())->get_base_type(); + /* SgExprListExp* dimList = t->get_dim_info(); + SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); + SgExpression* expr=NULL; + for (; j != dimList->get_expressions().end(); j++) + expr = *j; + */ + while (isSgArrayType(t)) + t = isSgArrayType(t)->get_base_type(); + + if (!isSgType(t)) { + char buf[1024]; + sprintf(buf, "CudaizeCodeGen: Array type undetected!"); + throw std::runtime_error(buf); + + } + + v.type = t; + } else + v.type = isSgPointerType(outArray->get_type())->get_base_type(); + } else if (isSgArrayType(outArray->get_type())) { + if (isSgArrayType( + isSgNode( + isSgArrayType(outArray->get_type())->get_base_type()))) { + // v.type = ((array_type *)(((ptr_type *)(outArray->type()))->ref_type()))->elem_type(); + SgType* t = + isSgArrayType(outArray->get_type())->get_base_type(); + /* SgExprListExp* dimList = t->get_dim_info(); + SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); + SgExpression* expr=NULL; + for (; j != dimList->get_expressions().end(); j++) + expr = *j; + */ + while (isSgArrayType(t)) + t = isSgArrayType(t)->get_base_type(); + + if (!isSgType(t)) { + char buf[1024]; + sprintf(buf, "CudaizeCodeGen: Array type undetected!"); + throw std::runtime_error(buf); + + } + + v.type = t; + } else + v.type = isSgArrayType(outArray->get_type())->get_base_type(); + } else + v.type = buildFloatType(); + v.tex_mapped = false; + v.cons_mapped = false; + v.original_name = wo_refs[i]->name(); + //Size of the array = dim1 * dim2 * num bytes of our array type + + //If our input array is 2D (non-linearized), we want the actual + //dimentions of the array + CG_outputRepr* size; + //Lookup in array_dims + std::map<std::string, int>::iterator it = array_dims.find(name.c_str()); + if (isSgPointerType(outArray->get_type()) + && isSgArrayType( + isSgNode( + isSgPointerType(outArray->get_type())->get_base_type()))) { + SgType* t = isSgPointerType(outArray->get_type())->get_base_type(); + /* SgExprListExp* dimList = t->get_dim_info(); + SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); + SgExpression* expr=NULL; + for (; j != dimList->get_expressions().end(); j++) + expr = *j; + */ + if (isSgIntVal(isSgArrayType(t)->get_index())) + size_n_d = + (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgUnsignedIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgUnsignedLongVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = + (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = + (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgUnsignedLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgAddOp(isSgArrayType(t)->get_index())) { + SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); + + SgExpression *lhs = op_add->get_lhs_operand(); + SgExpression *rhs = op_add->get_rhs_operand(); + + if (isSgIntVal(lhs)) + size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); + else if (isSgUnsignedIntVal(lhs)) + size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value() + + (int) isSgUnsignedIntVal(rhs)->get_value(); + else if (isSgUnsignedLongVal(lhs)) + size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongLongIntVal(lhs)) + size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + size_n_d = (int) (isSgLongIntVal(lhs)->get_value() + + isSgLongIntVal(rhs)->get_value()); + else if (isSgUnsignedLongLongIntVal(lhs)) + size_n_d = + (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongLongIntVal(rhs)->get_value()); + + } + t = isSgArrayType(t)->get_base_type(); + while (isSgArrayType(t)) { + int dim; + if (isSgIntVal(isSgArrayType(t)->get_index())) + dim = + (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgUnsignedIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgUnsignedLongVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongLongIntVal( + isSgArrayType(t)->get_index())) + dim = (int) (isSgUnsignedLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgAddOp(isSgArrayType(t)->get_index())) { + SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); + + SgExpression *lhs = op_add->get_lhs_operand(); + SgExpression *rhs = op_add->get_rhs_operand(); + + if (isSgIntVal(lhs)) + dim = (int) isSgIntVal(lhs)->get_value() + + (int) (isSgIntVal(rhs)->get_value()); + else if (isSgUnsignedIntVal(lhs)) + dim = (int) isSgUnsignedIntVal(lhs)->get_value() + + (int) isSgUnsignedIntVal(rhs)->get_value(); + else if (isSgUnsignedLongVal(lhs)) + dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongLongIntVal(lhs)) + dim = (int) (isSgLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + dim = (int) (isSgLongIntVal(lhs)->get_value() + + isSgLongIntVal(rhs)->get_value()); + else if (isSgUnsignedLongLongIntVal(lhs)) + dim = + (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongLongIntVal(rhs)->get_value()); + + } + size_n_d *= dim; + v.size_multi_dim.push_back(dim); + t = isSgArrayType(t)->get_base_type(); + } + //v.size_2d = (int) (isSgIntVal(t->get_index())->get_value()); + + if (cudaDebug) + printf("Detected Multi-dimensional array sized of %d for %s\n", + size_n_d, (char*) wo_refs[i]->name().c_str()); + size = ocg->CreateInt(size_n_d); + } else if (isSgArrayType(outArray->get_type()) + && isSgArrayType( + isSgNode( + isSgArrayType(outArray->get_type())->get_base_type()))) { + SgType* t = outArray->get_type(); + /* SgExprListExp* dimList = t->get_dim_info(); + SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); + SgExpression* expr=NULL; + for (; j != dimList->get_expressions().end(); j++) + expr = *j; + */ + + if (isSgIntVal(isSgArrayType(t)->get_index())) + size_n_d = + (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgUnsignedIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgUnsignedLongVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = + (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = + (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgUnsignedLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgAddOp(isSgArrayType(t)->get_index())) { + SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); + + SgExpression *lhs = op_add->get_lhs_operand(); + SgExpression *rhs = op_add->get_rhs_operand(); + + if (isSgIntVal(lhs)) + size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); + else if (isSgUnsignedIntVal(lhs)) + size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value() + + (int) isSgUnsignedIntVal(rhs)->get_value(); + else if (isSgUnsignedLongVal(lhs)) + size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongLongIntVal(lhs)) + size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + size_n_d = (int) (isSgLongIntVal(lhs)->get_value() + + isSgLongIntVal(rhs)->get_value()); + else if (isSgUnsignedLongLongIntVal(lhs)) + size_n_d = + (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongLongIntVal(rhs)->get_value()); + + } + t = isSgArrayType(t)->get_base_type(); + while (isSgArrayType(t)) { + int dim; + if (isSgIntVal(isSgArrayType(t)->get_index())) + dim = + (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgUnsignedIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgUnsignedLongVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongLongIntVal( + isSgArrayType(t)->get_index())) + dim = (int) (isSgUnsignedLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgAddOp(isSgArrayType(t)->get_index())) { + SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); + + SgExpression *lhs = op_add->get_lhs_operand(); + SgExpression *rhs = op_add->get_rhs_operand(); + + if (isSgIntVal(lhs)) + dim = (int) isSgIntVal(lhs)->get_value() + + (int) (isSgIntVal(rhs)->get_value()); + else if (isSgUnsignedIntVal(lhs)) + dim = (int) isSgUnsignedIntVal(lhs)->get_value() + + (int) isSgUnsignedIntVal(rhs)->get_value(); + else if (isSgUnsignedLongVal(lhs)) + dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongLongIntVal(lhs)) + dim = (int) (isSgLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + dim = (int) (isSgLongIntVal(lhs)->get_value() + + isSgLongIntVal(rhs)->get_value()); + else if (isSgUnsignedLongLongIntVal(lhs)) + dim = + (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongLongIntVal(rhs)->get_value()); + + } + size_n_d *= dim; + v.size_multi_dim.push_back(dim); + t = isSgArrayType(t)->get_base_type(); + } + + //v.size_2d = (int) (isSgIntVal(t->get_index())->get_value()); + + if (cudaDebug) + printf("Detected Multi-Dimensional array sized of %d for %s\n", + size_n_d, (char*) wo_refs[i]->name().c_str()); + size = ocg->CreateInt(size_n_d); + } else if (it != array_dims.end()) { + int ref_size = it->second; + //size = + // ocg->CreateInt( + // isSgIntVal( + // isSgArrayType(outArray->get_type())->get_index())->get_value()); + //v.size_2d = isSgArrayType(outArray->get_type())->get_rank(); + //v.var_ref_size = ref_size; + size = ocg->CreateInt(ref_size); + + } else { + if (dim1) { + size = ocg->CreateTimes( + new CG_roseRepr(isSgExpression(buildVarRefExp(dim1))), + new CG_roseRepr(isSgExpression(buildVarRefExp(dim2)))); + } else { + char buf[1024]; + sprintf(buf, + "CudaizeCodeGen: Array reference %s does not have a " + "detectable size or specififed dimentions", + name.c_str()); + throw std::runtime_error(buf); + } + } + + v.size_expr = + static_cast<CG_roseRepr*>(ocg->CreateTimes(size, + new omega::CG_roseRepr( + isSgExpression(buildSizeOfOp(v.type)))))->GetExpression(); + + v.in_data = 0; + v.out_data = outArray; + //Check for in ro_refs and remove it at this point + std::vector<IR_ArrayRef *>::iterator it_; + for (it_ = ro_refs.begin(); it_ != ro_refs.end(); it_++) { + if ((*it_)->name() == wo_refs[i]->name()) { + break; + } + } + if (it_ != ro_refs.end()) { + v.in_data = outArray; + ro_refs.erase(it_); + } + + arrayVars.push_back(v); + + } + + //protonu-- assuming that all texture mapped memories were originally read only mems + //there should be safety checks for that, will implement those later + + for (int i = 0; i < ro_refs.size(); i++) { + SgVariableSymbol* inArray = 0; + std::string name = ro_refs[i]->name(); + inArray = body_symtab->find_variable(SgName((char*) name.c_str())); + if (inArray == NULL) + inArray = parameter_symtab->find_variable( + SgName((char*) name.c_str())); + + VarDefs v; + v.size_multi_dim = std::vector<int>(); + char buf[32]; + snprintf(buf, 32, "devI%dPtr", i + 1); + v.name = buf; + int size_n_d; + if (isSgPointerType(inArray->get_type())) { + if (isSgArrayType( + isSgNode( + isSgPointerType(inArray->get_type())->get_base_type()))) { + + SgType* t = + isSgPointerType(inArray->get_type())->get_base_type(); + + while (isSgArrayType(t)) + t = isSgArrayType(t)->get_base_type(); + + if (!isSgType(t)) { + char buf[1024]; + sprintf(buf, "CudaizeCodeGen: Array type undetected!"); + throw std::runtime_error(buf); + + } + v.type = t; + } else + v.type = isSgPointerType(inArray->get_type())->get_base_type(); + } else if (isSgArrayType(inArray->get_type())) { + if (isSgArrayType( + isSgNode( + isSgArrayType(inArray->get_type())->get_base_type()))) { + + SgType* t = inArray->get_type(); + while (isSgArrayType(t)) + t = isSgArrayType(t)->get_base_type(); + + if (!isSgType(t)) { + char buf[1024]; + sprintf(buf, "CudaizeCodeGen: Array type undetected!"); + throw std::runtime_error(buf); + + } + v.type = t; + } else + v.type = isSgArrayType(inArray->get_type())->get_base_type(); + } + + else + v.type = buildFloatType(); + + v.tex_mapped = false; + v.cons_mapped = false; + v.original_name = ro_refs[i]->name(); + + //derick -- adding texture and constant mapping + if ( texture != NULL) + v.tex_mapped = (texture->is_array_mapped(name.c_str()))? true:false; //protonu-track tex mapped vars + if (v.tex_mapped){ + printf("this variable %s is mapped to texture memory", name.c_str()); + } + //derick -- this is commented out until constant memory is implemeted + if ( constant_mem != NULL) + v.cons_mapped = (constant_mem->is_array_mapped(name.c_str()))? true:false; //protonu-track tex mapped vars + if (v.cons_mapped){ + printf("this variable %s is mapped to constant memory", name.c_str()); + } + + //Size of the array = dim1 * dim2 * num bytes of our array type + //If our input array is 2D (non-linearized), we want the actual + //dimentions of the array (as it might be less than cu_n + CG_outputRepr* size; + //Lookup in array_dims + std::map<std::string, int>::iterator it = array_dims.find(name.c_str()); + if (isSgPointerType(inArray->get_type()) + && isSgArrayType( + isSgPointerType(inArray->get_type())->get_base_type())) { + //SgArrayType* t = isSgArrayType(isSgArrayType(inArray->get_type())->get_base_type()); + //v.size_2d = t->get_rank(); + SgType* t = isSgPointerType(inArray->get_type())->get_base_type(); + /* SgExprListExp* dimList = t->get_dim_info(); + SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); + SgExpression* expr=NULL; + for (; j != dimList->get_expressions().end(); j++) + expr = *j; + */ + //v.size_2d = 1; + if (isSgIntVal(isSgArrayType(t)->get_index())) + size_n_d = + (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgUnsignedIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgUnsignedLongVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = + (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = + (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgUnsignedLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgAddOp(isSgArrayType(t)->get_index())) { + SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); + + SgExpression *lhs = op_add->get_lhs_operand(); + SgExpression *rhs = op_add->get_rhs_operand(); + + if (isSgIntVal(lhs)) + size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); + else if (isSgUnsignedIntVal(lhs)) + size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value() + + (int) isSgUnsignedIntVal(rhs)->get_value(); + else if (isSgUnsignedLongVal(lhs)) + size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongLongIntVal(lhs)) + size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + size_n_d = (int) (isSgLongIntVal(lhs)->get_value() + + isSgLongIntVal(rhs)->get_value()); + else if (isSgUnsignedLongLongIntVal(lhs)) + size_n_d = + (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongLongIntVal(rhs)->get_value()); + + } + t = isSgArrayType(t)->get_base_type(); + while (isSgArrayType(t)) { + int dim; + if (isSgIntVal(isSgArrayType(t)->get_index())) + dim = + (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgUnsignedIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgUnsignedLongVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongLongIntVal( + isSgArrayType(t)->get_index())) + dim = (int) (isSgUnsignedLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgAddOp(isSgArrayType(t)->get_index())) { + SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); + + SgExpression *lhs = op_add->get_lhs_operand(); + SgExpression *rhs = op_add->get_rhs_operand(); + + if (isSgIntVal(lhs)) + dim = (int) isSgIntVal(lhs)->get_value() + + (int) (isSgIntVal(rhs)->get_value()); + else if (isSgUnsignedIntVal(lhs)) + dim = (int) isSgUnsignedIntVal(lhs)->get_value() + + (int) isSgUnsignedIntVal(rhs)->get_value(); + else if (isSgUnsignedLongVal(lhs)) + dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongLongIntVal(lhs)) + dim = (int) (isSgLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + dim = (int) (isSgLongIntVal(lhs)->get_value() + + isSgLongIntVal(rhs)->get_value()); + else if (isSgUnsignedLongLongIntVal(lhs)) + dim = + (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongLongIntVal(rhs)->get_value()); + + } + size_n_d *= dim; + v.size_multi_dim.push_back(dim); + t = isSgArrayType(t)->get_base_type(); + } + if (cudaDebug) + printf("Detected Multi-dimensional array sized of %d for %s\n", + size_n_d, (char*) ro_refs[i]->name().c_str()); + size = ocg->CreateInt(size_n_d); + } else if (isSgArrayType(inArray->get_type()) + && isSgArrayType( + isSgArrayType(inArray->get_type())->get_base_type())) { + //SgArrayType* t = isSgArrayType(isSgArrayType(inArray->get_type())->get_base_type()); + //v.size_2d = t->get_rank(); + SgType* t = inArray->get_type(); + /* SgExprListExp* dimList = t->get_dim_info(); + SgExpressionPtrList::iterator j= dimList->get_expressions().begin(); + SgExpression* expr=NULL; + for (; j != dimList->get_expressions().end(); j++) + expr = *j; + */ + + if (isSgIntVal(isSgArrayType(t)->get_index())) + size_n_d = + (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgUnsignedIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgUnsignedLongVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = + (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = + (int) (isSgLongIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongLongIntVal(isSgArrayType(t)->get_index())) + size_n_d = (int) (isSgUnsignedLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgAddOp(isSgArrayType(t)->get_index())) { + SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); + + SgExpression *lhs = op_add->get_lhs_operand(); + SgExpression *rhs = op_add->get_rhs_operand(); + + if (isSgIntVal(lhs)) + size_n_d = (int) isSgIntVal(lhs)->get_value() + (int) (isSgIntVal(rhs)->get_value()); + else if (isSgUnsignedIntVal(lhs)) + size_n_d = (int) isSgUnsignedIntVal(lhs)->get_value() + + (int) isSgUnsignedIntVal(rhs)->get_value(); + else if (isSgUnsignedLongVal(lhs)) + size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + size_n_d = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongLongIntVal(lhs)) + size_n_d = (int) (isSgLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + size_n_d = (int) (isSgLongIntVal(lhs)->get_value() + + isSgLongIntVal(rhs)->get_value()); + else if (isSgUnsignedLongLongIntVal(lhs)) + size_n_d = + (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongLongIntVal(rhs)->get_value()); + + } + t = isSgArrayType(t)->get_base_type(); + while (isSgArrayType(t)) { + int dim; + if (isSgIntVal(isSgArrayType(t)->get_index())) + dim = + (int) (isSgIntVal(isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgUnsignedIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgUnsignedLongVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongLongIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgLongIntVal(isSgArrayType(t)->get_index())) + dim = (int) (isSgLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgUnsignedLongLongIntVal( + isSgArrayType(t)->get_index())) + dim = (int) (isSgUnsignedLongLongIntVal( + isSgArrayType(t)->get_index())->get_value()); + else if (isSgAddOp(isSgArrayType(t)->get_index())) { + SgAddOp *op_add = isSgAddOp(isSgArrayType(t)->get_index()); + + SgExpression *lhs = op_add->get_lhs_operand(); + SgExpression *rhs = op_add->get_rhs_operand(); + + if (isSgIntVal(lhs)) + dim = (int) isSgIntVal(lhs)->get_value() + + (int) (isSgIntVal(rhs)->get_value()); + else if (isSgUnsignedIntVal(lhs)) + dim = (int) isSgUnsignedIntVal(lhs)->get_value() + + (int) isSgUnsignedIntVal(rhs)->get_value(); + else if (isSgUnsignedLongVal(lhs)) + dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + dim = (int) (isSgUnsignedLongVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongLongIntVal(lhs)) + dim = (int) (isSgLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongVal(rhs)->get_value()); + else if (isSgLongIntVal(lhs)) + dim = (int) (isSgLongIntVal(lhs)->get_value() + + isSgLongIntVal(rhs)->get_value()); + else if (isSgUnsignedLongLongIntVal(lhs)) + dim = + (int) (isSgUnsignedLongLongIntVal(lhs)->get_value() + + isSgUnsignedLongLongIntVal(rhs)->get_value()); + + } + size_n_d *= dim; + v.size_multi_dim.push_back(dim); + t = isSgArrayType(t)->get_base_type(); + } + if (cudaDebug) + printf("Detected Multi-Dimensional array sized of %d for %s\n", + size_n_d, (char*) ro_refs[i]->name().c_str()); + size = ocg->CreateInt(size_n_d); + } + + else if (it != array_dims.end()) { + int ref_size = it->second; + // v.var_ref_size = ref_size; + size = ocg->CreateInt(ref_size); + } else { + if (dim1) { + size = ocg->CreateTimes( + new CG_roseRepr(isSgExpression(buildVarRefExp(dim1))), + new CG_roseRepr(isSgExpression(buildVarRefExp(dim2)))); + } else { + char buf[1024]; + sprintf(buf, + "CudaizeCodeGen: Array reference %s does not have a " + "detectable size or specififed dimentions", + name.c_str()); + throw std::runtime_error(buf); + } + } + v.size_expr = + static_cast<CG_roseRepr*>(ocg->CreateTimes(size, + new omega::CG_roseRepr( + isSgExpression(buildSizeOfOp(v.type)))))->GetExpression(); + + v.in_data = inArray; + v.out_data = 0; + arrayVars.push_back(v); + } + + if (arrayVars.size() < 2) { + fprintf(stderr, + "cudaize error: Did not find two arrays being accessed\n"); + return false; + } + + //protonu--debugging tool--the printf statement + //tex_mem_on signals use of tex mem + /* derick -- texmapping near malloc mcopy + for(int i=0; i<arrayVars.size(); i++) + { + //printf("var name %s, tex_mem used %s\n", arrayVars[i].name.c_str(), (arrayVars[i].tex_mapped)?"true":"false"); + if (arrayVars[i].tex_mapped ) tex_mem_on ++; + //if (arrayVars[i].cons_mapped ) cons_mem_on ++; + } + */ + + //Add our mallocs (and input array memcpys) + for (int i = 0; i < arrayVars.size(); i++) { + if(arrayVars[i].cons_mapped) { + setupConstantVar(constant_mem, &arrayVars[i], globals, i, symtab); + SgStatementPtrList *tnl = new SgStatementPtrList; + cudaBindConstantVar(constant_mem, &arrayVars[i], globals, tnl); + setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl)); + } + else { + SgVariableDeclaration* defn = buildVariableDeclaration( + SgName(arrayVars[i].name.c_str()), + buildPointerType(arrayVars[i].type)); + SgInitializedNamePtrList& variables = defn->get_variables(); + SgInitializedNamePtrList::const_iterator j = variables.begin(); + SgInitializedName* initializedName = *j; + SgVariableSymbol* dvs = new SgVariableSymbol(initializedName); + prependStatement(defn, func_body); + + dvs->set_parent(body_symtab); + body_symtab->insert(SgName(arrayVars[i].name.c_str()), dvs); + +// SgVariableSymbol* dvs = body_symtab->find_variable(SgName(arrayVars[i].name.c_str())); + + // if(dvs == NULL) + // dvs = parameter_symtab->find_variable(SgName(arrayVars[i].name.c_str())); + + //cudaMalloc args + // SgBasicBlock* block = buildBasicBlock(); + SgName name_cuda_malloc("cudaMalloc"); + SgFunctionDeclaration * decl_cuda_malloc = + buildNondefiningFunctionDeclaration(name_cuda_malloc, + buildVoidType(), buildFunctionParameterList(), globals); + + SgName name_cuda_copy("cudaMemcpy"); + SgFunctionDeclaration * decl_cuda_copy = + buildNondefiningFunctionDeclaration(name_cuda_copy, + buildVoidType(), buildFunctionParameterList(), globals); + + SgExprListExp* args = buildExprListExp(); + args->append_expression( + buildCastExp(buildAddressOfOp(buildVarRefExp(dvs)), + buildPointerType(buildPointerType(buildVoidType())))); + args->append_expression(arrayVars[i].size_expr); + +// decl_cuda_malloc->get_parameterList()->append_arg + SgFunctionCallExp *the_call = buildFunctionCallExp( + buildFunctionRefExp(decl_cuda_malloc), args); + + SgExprStatement* stmt = buildExprStatement(the_call); + + // (*replacement_list).push_back (stmt); + + SgStatementPtrList* tnl = new SgStatementPtrList; + (*tnl).push_back(stmt); + setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl)); + if (arrayVars[i].in_data) { + + SgExprListExp * cuda_copy_in_args = buildExprListExp(); + cuda_copy_in_args->append_expression( + isSgExpression(buildVarRefExp(dvs))); + cuda_copy_in_args->append_expression( + isSgExpression(buildVarRefExp(arrayVars[i].in_data))); + CG_roseRepr* size_exp = new CG_roseRepr(arrayVars[i].size_expr); + cuda_copy_in_args->append_expression( + static_cast<CG_roseRepr*>(size_exp->clone())->GetExpression()); + cuda_copy_in_args->append_expression( + buildOpaqueVarRefExp("cudaMemcpyHostToDevice", globals)); + +// cuda_copy_in_args->append_expression( +// new SgVarRefExp(sourceLocation, ) +// ); + SgFunctionCallExp * cuda_copy_in_func_call = buildFunctionCallExp( + buildFunctionRefExp(decl_cuda_copy), cuda_copy_in_args); + + SgExprStatement* stmt = buildExprStatement(cuda_copy_in_func_call); + + SgStatementPtrList *tnl = new SgStatementPtrList; + (*tnl).push_back(stmt); + setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl)); + + if(arrayVars[i].tex_mapped) { + setupTexmappingVar(texture, &arrayVars[i], globals, i, dvs, symtab); + SgStatementPtrList *tnl = new SgStatementPtrList; + cudaBindTexture(texture, &arrayVars[i], globals, tnl); + setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl)); + } + } + } + } + + //Build dimGrid dim3 variables based on loop dimentions and ti/tj + char blockD1[120]; + char blockD2[120]; + if (dim1) { + snprintf(blockD1, 120, "%s/%d", + dim1->get_declaration()->get_name().getString().c_str(), cu_tx); + snprintf(blockD2, 120, "%s/%d", + dim2->get_declaration()->get_name().getString().c_str(), cu_ty); + } else { + snprintf(blockD1, 120, "%d", cu_bx); + snprintf(blockD2, 120, "%d", cu_by); + //snprintf(blockD1, 120, "%d/%d", cu_nx, cu_tx); + //snprintf(blockD2, 120, "%d/%d", cu_ny, cu_ty); + } + + SgInitializedName* arg1 = buildInitializedName("i", buildIntType()); + SgInitializedName* arg2 = buildInitializedName("j", buildIntType()); + SgInitializedName* arg3 = buildInitializedName("k", buildIntType()); + SgName type_name("dim3"); + //SgClassSymbol * type_symbol = globalScope->lookup_class_symbol(type_name); + + //ROSE_ASSERT(type_symbol != NULL); + + //SgClassDeclaration * dim3classdecl = isSgClassDeclaration( + // type_symbol->get_declaration()); + + SgFunctionDeclaration * funcdecl = buildNondefiningFunctionDeclaration( + SgName("dim3"), buildOpaqueType("dim3", globalScope), + //isSgType(dim3classdecl->get_type()), + buildFunctionParameterList(arg1, arg2, arg3), globalScope); + + if (cu_bx && cu_by) + repr = ocg->CreateDim3((const char*) gridName, ocg->CreateInt(cu_bx), + ocg->CreateInt(cu_by)); + else if (cu_bx_repr && cu_by_repr) + repr = ocg->CreateDim3((const char*) gridName, cu_bx_repr, cu_by_repr); + else if (cu_bx_repr) + repr = ocg->CreateDim3((const char*) gridName, cu_bx_repr, + ocg->CreateInt(1)); + setup_code = ocg->StmtListAppend(setup_code, repr); + //SgStatementPtrList* dimList = static_cast<CG_roseRepr *>(repr)->GetList(); + + //for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++) + // (*replacement_list).push_back (*it); + + // repr = ocg->CreateDim3((const char*)blockName, cu_tx,cu_ty); + + if (cu_tz > 1 || cu_tz_repr) { + + if (cu_tx && cu_ty && cu_tz) + repr = ocg->CreateDim3((char*) blockName, ocg->CreateInt(cu_tx), + ocg->CreateInt(cu_ty), ocg->CreateInt(cu_tz)); + else if (cu_tx_repr && cu_ty_repr && cu_tz_repr) + repr = ocg->CreateDim3((char*) blockName, cu_tx_repr, cu_ty_repr, + cu_tz_repr); + // SgStatementPtrList* dimList = static_cast<CG_roseRepr *>(repr)->GetList(); + + // for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++) + // (*replacement_list).push_back (*it); + + } else { + if (cu_tx && cu_ty) + repr = ocg->CreateDim3((char*) blockName, ocg->CreateInt(cu_tx), + ocg->CreateInt(cu_ty)); + else if (cu_tx_repr && cu_ty_repr) + repr = ocg->CreateDim3((char*) blockName, cu_tx_repr, cu_ty_repr); + //SgStatementPtrList* dimList = static_cast<CG_roseRepr *>(repr)->GetList(); + + //for(SgStatementPtrList::iterator it = (*dimList).begin(); it != (*dimList).end(); it++) + // (*replacement_list).push_back (*it); + + } + + setup_code = ocg->StmtListAppend(setup_code, repr); + + SgCudaKernelExecConfig* config = new SgCudaKernelExecConfig( + buildVarRefExp(gridName), buildVarRefExp(blockName), NULL, NULL); + //SgCudaKernelExecConfig* config = new SgCudaKernelExecConfig(buildIntVal(cu_bx), , NULL, NULL); + SgExprListExp* iml = new SgExprListExp(); + SgCastExp* dim_s; + + //Creating Kernel function + SgBasicBlock* bb = new SgBasicBlock(TRANSFORMATION_FILE_INFO); + SgFunctionDefinition* kernel_defn = new SgFunctionDefinition( + TRANSFORMATION_FILE_INFO, bb); + SgFunctionDeclaration* kernel_decl_ = new SgFunctionDeclaration( + TRANSFORMATION_FILE_INFO, SgName((char*)cu_kernel_name.c_str()),buildFunctionType(buildVoidType(), buildFunctionParameterList()), kernel_defn); + SgFunctionDeclaration* kernel_decl = new SgFunctionDeclaration( + TRANSFORMATION_FILE_INFO, SgName((char*)cu_kernel_name.c_str()),buildFunctionType(buildVoidType(), buildFunctionParameterList()), kernel_defn); + + //((kernel_decl->get_declarationModifier()).get_storageModifier()).setStatic(); + + kernel_decl->set_definingDeclaration(kernel_decl); + kernel_defn->set_parent(kernel_decl); + bb->set_parent(kernel_defn); + bb->set_endOfConstruct(TRANSFORMATION_FILE_INFO); + bb->get_endOfConstruct()->set_parent(bb); + + //SgFunctionSymbol* functionSymbol = new SgFunctionSymbol(kernel_decl_); + //globals->insert_symbol(SgName((char*) cu_kernel_name.c_str()), + // functionSymbol); + SgFunctionSymbol* functionSymbol2 = new SgFunctionSymbol(kernel_decl); + + globals->insert_symbol(SgName((char*) cu_kernel_name.c_str()), + functionSymbol2); + + kernel_decl_->set_parent(globals); + + kernel_decl_->set_scope(globals); + + kernel_decl_->setForward(); + + globals->prepend_declaration(kernel_decl_); + + kernel_decl->set_endOfConstruct(TRANSFORMATION_FILE_INFO); + kernel_decl->get_endOfConstruct()->set_parent(kernel_decl); + + kernel_decl->set_parent(globals); + kernel_decl->set_scope(globals); + + kernel_decl->get_definition()->set_endOfConstruct(TRANSFORMATION_FILE_INFO); + kernel_decl->get_definition()->get_endOfConstruct()->set_parent( + kernel_decl->get_definition()); + + globals->append_statement(kernel_decl); + + //printf("%s %s\n", static_cast<const char*>(cu_kernel_name), dims); + //--derick - kernel function parameters + for (int i = 0; i < arrayVars.size(); i++) + //Throw in a type cast if our kernel takes 2D array notation + //like (float(*) [1024]) + { + //protonu--throwing in another hack to stop the caller from passing tex mapped + //vars to the kernel. + if (arrayVars[i].tex_mapped == true || arrayVars[i].cons_mapped) + continue; + if (!(arrayVars[i].size_multi_dim.empty())) { + //snprintf(dims,120,"(float(*) [%d])%s", arrayVars[i].size_2d, + // const_cast<char*>(arrayVars[i].name.c_str())); + + SgType* t = arrayVars[i].type; + for (int k = arrayVars[i].size_multi_dim.size() - 1; k >= 0; k--) { + t = buildArrayType(t, + buildIntVal(arrayVars[i].size_multi_dim[k])); + } + SgVariableSymbol* temp = body_symtab->find_variable( + SgName((char*) arrayVars[i].name.c_str())); + if (temp == NULL) + temp = parameter_symtab->find_variable( + SgName((char*) arrayVars[i].name.c_str())); + + dim_s = buildCastExp(buildVarRefExp(temp), buildPointerType(t), + SgCastExp::e_C_style_cast); + + //printf("%d %s\n", i, dims); + iml->append_expression(dim_s); + + SgInitializedName* id = buildInitializedName( + (char*) arrayVars[i].original_name.c_str(), + buildPointerType(t)); + kernel_decl->get_parameterList()->append_arg(id); + kernel_decl_->get_parameterList()->append_arg(id); + id->set_file_info(TRANSFORMATION_FILE_INFO); + + // DQ (9/8/2007): We now test this, so it has to be set explicitly. + id->set_scope(kernel_decl->get_definition()); + + // DQ (9/8/2007): Need to add variable symbol to global scope! + //printf ("Fixing up the symbol table in scope = %p = %s for SgInitializedName = %p = %s \n",globalScope,globalScope->class_name().c_str(),var1_init_name,var1_init_name->get_name().str()); + SgVariableSymbol *var_symbol = new SgVariableSymbol(id); + kernel_decl->get_definition()->insert_symbol(id->get_name(), + var_symbol); + + // if(kernel_decl->get_definition()->get_symbol_table()->find((const) id) == NULL) + + } else { + //printf("%d %s\n", i, static_cast<const char*>(arrayVars[i].name)); + SgVariableSymbol* temp = body_symtab->find_variable( + SgName((char*) arrayVars[i].name.c_str())); + if (temp == NULL) + temp = parameter_symtab->find_variable( + SgName((char*) arrayVars[i].name.c_str())); + iml->append_expression(buildVarRefExp(temp)); + SgInitializedName* id = buildInitializedName( + (char*) arrayVars[i].original_name.c_str(), + buildPointerType(arrayVars[i].type)); + kernel_decl->get_parameterList()->append_arg(id); + kernel_decl_->get_parameterList()->append_arg(id); + id->set_file_info(TRANSFORMATION_FILE_INFO); + + // DQ (9/8/2007): We now test this, so it has to be set explicitly. + id->set_scope(kernel_decl->get_definition()); + + // DQ (9/8/2007): Need to add variable symbol to global scope! + //printf ("Fixing up the symbol table in scope = %p = %s for SgInitializedName = %p = %s \n"$ + SgVariableSymbol *var_symbol = new SgVariableSymbol(id); + kernel_decl->get_definition()->insert_symbol(id->get_name(), + var_symbol); + + } + + } + if (dim1) { + iml->append_expression(buildVarRefExp(dim1)); + SgInitializedName* id = buildInitializedName( + dim1->get_name().getString().c_str(), dim1->get_type()); + kernel_decl->get_parameterList()->append_arg(id); + + iml->append_expression(buildVarRefExp(dim2)); + SgInitializedName* id2 = buildInitializedName( + dim2->get_name().getString().c_str(), dim2->get_type()); + + kernel_decl->get_parameterList()->append_arg(id); + kernel_decl_->get_parameterList()->append_arg(id); + } + + kernel_decl->get_functionModifier().setCudaKernel(); + kernel_decl_->get_functionModifier().setCudaKernel(); + SgCudaKernelCallExp * cuda_call_site = new SgCudaKernelCallExp( + TRANSFORMATION_FILE_INFO, buildFunctionRefExp(kernel_decl), iml,config); + + // SgStatementPtrList *tnl2 = new SgStatementPtrList; + + (*replacement_list).push_back(buildExprStatement(cuda_call_site)); + + setup_code = ocg->StmtListAppend(setup_code, + new CG_roseRepr(replacement_list)); + + //cuda free variables + for (int i = 0; i < arrayVars.size(); i++) { + if (arrayVars[i].out_data) { + + SgName name_cuda_copy("cudaMemcpy"); + SgFunctionDeclaration * decl_cuda_copyout = + buildNondefiningFunctionDeclaration(name_cuda_copy, + buildVoidType(), buildFunctionParameterList(), + globals); + + SgExprListExp* args = buildExprListExp(); + SgExprListExp * cuda_copy_out_args = buildExprListExp(); + cuda_copy_out_args->append_expression( + isSgExpression(buildVarRefExp(arrayVars[i].out_data))); + cuda_copy_out_args->append_expression( + isSgExpression(buildVarRefExp(arrayVars[i].name))); + CG_roseRepr* size_exp = new CG_roseRepr(arrayVars[i].size_expr); + cuda_copy_out_args->append_expression( + static_cast<CG_roseRepr*>(size_exp->clone())->GetExpression()); + cuda_copy_out_args->append_expression( + buildOpaqueVarRefExp("cudaMemcpyDeviceToHost", globals)); + +// cuda_copy_in_args->append_expression( +// new SgVarRefExp(sourceLocation, ) +// ); + SgFunctionCallExp * cuda_copy_out_func_call = buildFunctionCallExp( + buildFunctionRefExp(decl_cuda_copyout), cuda_copy_out_args); + + SgFunctionCallExp *the_call = buildFunctionCallExp( + buildFunctionRefExp(decl_cuda_copyout), cuda_copy_out_args); + + SgExprStatement* stmt = buildExprStatement(the_call); + + SgStatementPtrList* tnl3 = new SgStatementPtrList; + + (*tnl3).push_back(stmt); + + // tree_node_list* tnl = new tree_node_list; + // tnl->append(new tree_instr(the_call)); + setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl3)); + + } + if(!arrayVars[i].cons_mapped) { + SgName name_cuda_free("cudaFree"); + SgFunctionDeclaration * decl_cuda_free = + buildNondefiningFunctionDeclaration(name_cuda_free, + buildVoidType(), buildFunctionParameterList(), globals); + + SgExprListExp* args3 = buildExprListExp(); + + SgVariableSymbol* tmp = body_symtab->find_variable( + SgName(arrayVars[i].name.c_str())); + if (tmp == NULL) + tmp = parameter_symtab->find_variable( + SgName(arrayVars[i].name.c_str())); + + args3->append_expression(buildVarRefExp(tmp)); + + SgFunctionCallExp *the_call2 = buildFunctionCallExp( + buildFunctionRefExp(decl_cuda_free), args3); + + SgExprStatement* stmt2 = buildExprStatement(the_call2); + + SgStatementPtrList* tnl4 = new SgStatementPtrList; + + (*tnl4).push_back(stmt2); + //(*replacement_list).push_back (stmt2); + + setup_code = ocg->StmtListAppend(setup_code, new CG_roseRepr(tnl4)); + } + } + + // --------------- + // BUILD THE KERNEL + // --------------- + + //Extract out kernel body + SgNode* code = getCode(); + //Create kernel function body + //Add Params + std::map<std::string, SgVariableSymbol*> loop_vars; + //In-Out arrays + for (int i = 0; i < arrayVars.size(); i++) { + /* if(arrayVars[i].in_data) + fptr = arrayVars[i].in_data->type()->clone(); + else + fptr = arrayVars[i].out_data->type()->clone(); + */ + + // fptr = new_proc_syms->install_type(fptr); + std::string name = + arrayVars[i].in_data ? + arrayVars[i].in_data->get_declaration()->get_name().getString() : + arrayVars[i].out_data->get_declaration()->get_name().getString(); + //SgVariableSymbol* sym = new var_sym(fptr, arrayVars[i].in_data ? arrayVars[i].in_data->name() : arrayVars[i].out_data->name()); + + SgVariableSymbol* sym = + kernel_decl->get_definition()->get_symbol_table()->find_variable( + (const char*) name.c_str()); + /* SgVariableDeclaration* defn = buildVariableDeclaration(SgName(name.c_str()), buildFloatType()); + SgInitializedNamePtrList& variables = defn->get_variables(); + SgInitializedNamePtrList::const_iterator i = variables.begin(); + SgInitializedName* initializedName = *i; + SgVariableSymbol* sym = new SgVariableSymbol(initializedName); + prependStatement(defn, isSgScopeStatement(root_)); + + vs->set_parent(symtab2_); + symtab2_->insert(SgName(_s.c_str()), vs); + */ + + if (sym != NULL) + loop_vars.insert( + std::pair<std::string, SgVariableSymbol*>(std::string(name), + sym)); + } + + //Figure out which loop variables will be our thread and block dimention variables + std::vector<SgVariableSymbol *> loop_syms; + //Get our indexes + std::vector<const char*> indexes; // = get_loop_indexes(code,cu_num_reduce); + int threadsPos = 0; + + CG_outputRepr *body = NULL; + SgFunctionDefinition* func_d = func_definition; + //std::vector<SgVariableSymbol *> symbols = recursiveFindRefs(code); + + SgName name_sync("__syncthreads"); + SgFunctionDeclaration * decl_sync = buildNondefiningFunctionDeclaration( + name_sync, buildVoidType(), buildFunctionParameterList(), + globalScope); + + recursiveFindRefs(code, syms, func_d); + + //SgFunctionDeclaration* func = Outliner::generateFunction (code, (char*)cu_kernel_name.c_str(), syms, pdSyms, psyms, NULL, globalScope); + + if (cu_bx > 1 || cu_bx_repr) { + indexes.push_back("bx"); + SgName type_name("blockIdx.x"); + SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( + type_name); + SgVariableDeclaration * var_decl = buildVariableDeclaration("bx", + buildIntType(), NULL, + isSgScopeStatement(kernel_decl->get_definition()->get_body())); + SgStatementPtrList *tnl = new SgStatementPtrList; + // (*tnl).push_back(isSgStatement(var_decl)); + appendStatement(var_decl, kernel_decl->get_definition()->get_body()); + + SgVariableSymbol* bx = + kernel_decl->get_definition()->get_body()->lookup_variable_symbol( + SgName("bx")); + SgStatement* assign = isSgStatement( + buildAssignStatement(buildVarRefExp(bx), + buildOpaqueVarRefExp("blockIdx.x", + kernel_decl->get_definition()->get_body()))); + (*tnl).push_back(assign); + // body = ocg->StmtListAppend(body, + // new CG_roseRepr(tnl)); + appendStatement(assign, kernel_decl->get_definition()->get_body()); + + } + if (cu_by > 1 || cu_by_repr) { + indexes.push_back("by"); + SgName type_name("blockIdx.y"); + SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( + type_name); + SgVariableDeclaration * var_decl = buildVariableDeclaration("by", + buildIntType(), NULL, + isSgScopeStatement(kernel_decl->get_definition()->get_body())); + // SgStatementPtrList *tnl = new SgStatementPtrList; + // (*tnl).push_back(isSgStatement(var_decl)); + appendStatement(var_decl, kernel_decl->get_definition()->get_body()); + + SgVariableSymbol* by = + kernel_decl->get_definition()->get_body()->lookup_variable_symbol( + SgName("by")); + SgStatement* assign = isSgStatement( + buildAssignStatement(buildVarRefExp(by), + buildOpaqueVarRefExp("blockIdx.y", + kernel_decl->get_definition()->get_body()))); + //(*tnl).push_back(assign); + // body = ocg->StmtListAppend(body, + // new CG_roseRepr(tnl)); + appendStatement(assign, kernel_decl->get_definition()->get_body()); + + } + if (cu_tx_repr || cu_tx > 1) { + threadsPos = indexes.size(); + indexes.push_back("tx"); + SgName type_name("threadIdx.x"); + SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( + type_name); + SgVariableDeclaration * var_decl = buildVariableDeclaration("tx", + buildIntType(), NULL, + isSgScopeStatement(kernel_decl->get_definition()->get_body())); + // SgStatementPtrList *tnl = new SgStatementPtrList; + // (*tnl).push_back(isSgStatement(var_decl)); + appendStatement(var_decl, kernel_decl->get_definition()->get_body()); + + SgVariableSymbol* tx = + kernel_decl->get_definition()->get_body()->lookup_variable_symbol( + SgName("tx")); + SgStatement* assign = isSgStatement( + buildAssignStatement(buildVarRefExp(tx), + buildOpaqueVarRefExp("threadIdx.x", + kernel_decl->get_definition()->get_body()))); + //(*tnl).push_back(assign); + // body = ocg->StmtListAppend(body, + // new CG_roseRepr(tnl)); + appendStatement(assign, kernel_decl->get_definition()->get_body()); + + } + if (cu_ty_repr || cu_ty > 1) { + indexes.push_back("ty"); + SgName type_name("threadIdx.y"); + SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( + type_name); + SgVariableDeclaration * var_decl = buildVariableDeclaration("ty", + buildIntType(), NULL, + isSgScopeStatement(kernel_decl->get_definition()->get_body())); + appendStatement(var_decl, kernel_decl->get_definition()->get_body()); + + // SgStatementPtrList *tnl = new SgStatementPtrList; + // (*tnl).push_back(isSgStatement(var_decl)); + SgVariableSymbol* ty = + kernel_decl->get_definition()->get_body()->lookup_variable_symbol( + SgName("ty")); + SgStatement* assign = isSgStatement( + buildAssignStatement(buildVarRefExp(ty), + buildOpaqueVarRefExp("threadIdx.y", + kernel_decl->get_definition()->get_body()))); + // (*tnl).push_back(assign); + // body = ocg->StmtListAppend(body, + // new CG_roseRepr(tnl)); + appendStatement(assign, kernel_decl->get_definition()->get_body()); + + } + if (cu_tz_repr || cu_tz > 1) { + indexes.push_back("tz"); + SgName type_name("threadIdx.z"); + SgClassSymbol * type_symbol = globalScope->lookup_class_symbol( + type_name); + SgVariableDeclaration * var_decl = buildVariableDeclaration("tz", + buildIntType(), NULL, + isSgScopeStatement(kernel_decl->get_definition()->get_body())); + // SgStatementPtrList *tnl = new SgStatementPtrList; + // (*tnl).push_back(isSgStatement(var_decl)); + appendStatement(var_decl, kernel_decl->get_definition()->get_body()); + + SgVariableSymbol* tz = + kernel_decl->get_definition()->get_body()->lookup_variable_symbol( + "tz"); + SgStatement* assign = isSgStatement( + buildAssignStatement(buildVarRefExp(tz), + buildOpaqueVarRefExp("threadIdx.z", + kernel_decl->get_definition()->get_body()))); + // (*tnl).push_back(assign); + // body = ocg->StmtListAppend(body, + // new CG_roseRepr(tnl)); + appendStatement(assign, kernel_decl->get_definition()->get_body()); + + } + + std::map<std::string, SgVariableSymbol*> loop_idxs; //map from idx names to their new syms + + SgNode* swapped_ = swapVarReferences(code, syms, + kernel_decl->get_definition()->get_symbol_table(), + kernel_decl->get_definition()->get_body()->get_symbol_table(), + kernel_decl->get_definition()->get_body()); + + //std::cout << swapped_->unparseToString() << std::endl << std::endl; + + SgNode *swapped = recursiveFindReplacePreferedIdxs(swapped_, + kernel_decl->get_definition()->get_body()->get_symbol_table(), + kernel_decl->get_definition()->get_symbol_table(), + kernel_decl->get_definition()->get_body(), loop_idxs, globalScope); //in-place swapping + //swapped->print(); + + if (!isSgBasicBlock(swapped)) { + appendStatement(isSgStatement(swapped), + kernel_decl->get_definition()->get_body()); + swapped->set_parent( + isSgNode(kernel_decl->get_definition()->get_body())); + } else { + + for (SgStatementPtrList::iterator it = + isSgBasicBlock(swapped)->get_statements().begin(); + it != isSgBasicBlock(swapped)->get_statements().end(); it++) { + appendStatement(*it, kernel_decl->get_definition()->get_body()); + (*it)->set_parent( + isSgNode(kernel_decl->get_definition()->get_body())); + + } + + } + + for (int i = 0; i < indexes.size(); i++) { + std::vector<SgForStatement*> tfs = findCommentedFors(indexes[i], + swapped); + for (int k = 0; k < tfs.size(); k++) { + //printf("replacing %p tfs for index %s\n", tfs[k], indexes[i]); + SgNode* newBlock = forReduce(tfs[k], loop_idxs[indexes[i]], + kernel_decl->get_definition()); + //newBlock->print(); + swap_node_for_node_list(tfs[k], newBlock); + //printf("AFTER SWAP\n"); newBlock->print(); + } + } + + //--derick replace array refs of texture mapped vars here + body = new CG_roseRepr(kernel_decl->get_definition()->get_body()); + std::vector<IR_ArrayRef*> refs = ir->FindArrayRef(body); + texmapArrayRefs(texture, &refs, globals, ir, ocg); + // do the same for constant mapped vars + consmapArrayRefs(constant_mem, &refs, globals, ir, ocg); + + return swapped; +} + +//Order taking out dummy variables +std::vector<std::string> cleanOrder(std::vector<std::string> idxNames) { + std::vector<std::string> results; + for (int j = 0; j < idxNames.size(); j++) { + if (idxNames[j].length() != 0) + results.push_back(idxNames[j]); + } + return results; +} + +//First non-dummy level in ascending order +int LoopCuda::nonDummyLevel(int stmt, int level) { + //level comes in 1-basd and should leave 1-based + for (int j = level - 1; j < idxNames[stmt].size(); j++) { + if (idxNames[stmt][j].length() != 0) { + //printf("found non dummy level of %d with idx: %s when searching for %d\n", j+1, (const char*) idxNames[stmt][j], level); + return j + 1; + } + } + char buf[128]; + sprintf(buf, "%d", level); + throw std::runtime_error( + std::string("Unable to find a non-dummy level starting from ") + + std::string(buf)); +} + +int LoopCuda::findCurLevel(int stmt, std::string idx) { + for (int j = 0; j < idxNames[stmt].size(); j++) { + if (strcmp(idxNames[stmt][j].c_str(), idx.c_str()) == 0) + return j + 1; + } + throw std::runtime_error( + std::string("Unable to find index ") + idx + + std::string(" in current list of indexes")); +} + +void LoopCuda::permute_cuda(int stmt, + const std::vector<std::string>& curOrder) { + //printf("curOrder: "); + //printVs(curOrder); + //printf("idxNames: "); + //printVS(idxNames[stmt]); + std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt]); + bool same = true; + std::vector<int> pi; + for (int i = 0; i < curOrder.size(); i++) { + bool found = false; + for (int j = 0; j < cIdxNames.size(); j++) { + if (strcmp(cIdxNames[j].c_str(), curOrder[i].c_str()) == 0) { + pi.push_back(j + 1); + found = true; + if (j != i) + same = false; + } + } + if (!found) { + throw std::runtime_error( + "One of the indexes in the permute order were not " + "found in the current set of indexes."); + } + } + for (int i = curOrder.size(); i < cIdxNames.size(); i++) { + pi.push_back(i); + } + if (same) + return; + permute(stmt, pi); + //Set old indexe names as new + for (int i = 0; i < curOrder.size(); i++) { + idxNames[stmt][i] = curOrder[i].c_str(); //what about sibling stmts? + } +} + +bool LoopCuda::permute(int stmt_num, const std::vector<int> &pi) { +// check for sanity of parameters + if (stmt_num >= stmt.size() || stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + const int n = stmt[stmt_num].xform.n_out(); + if (pi.size() > (n - 1) / 2) + throw std::invalid_argument( + "iteration space dimensionality does not match permute dimensionality"); + int first_level = 0; + int last_level = 0; + for (int i = 0; i < pi.size(); i++) { + if (pi[i] > (n - 1) / 2 || pi[i] <= 0) + throw std::invalid_argument( + "invalid loop level " + to_string(pi[i]) + + " in permuation"); + + if (pi[i] != i + 1) { + if (first_level == 0) + first_level = i + 1; + last_level = i + 1; + } + } + if (first_level == 0) + return true; + + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> active = getStatements(lex, 2 * first_level - 2); + Loop::permute(active, pi); +} + +void LoopCuda::tile_cuda(int stmt, int level, int outer_level) { + tile_cuda(stmt, level, 1, outer_level, "", "", CountedTile); +} +void LoopCuda::tile_cuda(int level, int tile_size, int outer_level, + std::string idxName, std::string ctrlName, TilingMethodType method) { + tile_cuda(0, level, tile_size, outer_level, idxName, ctrlName, method); +} + +void LoopCuda::tile_cuda(int stmt, int level, int tile_size, int outer_level, + std::string idxName, std::string ctrlName, TilingMethodType method) { + //Do regular tile but then update the index and control loop variable + //names as well as the idxName to reflect the current state of things. + //printf("tile(%d,%d,%d,%d)\n", stmt, level, tile_size, outer_level); + //printf("idxNames before: "); + //printVS(idxNames[stmt]); + + tile(stmt, level, tile_size, outer_level, method); + + if (idxName.size()) + idxNames[stmt][level - 1] = idxName.c_str(); + if (tile_size == 1) { + //potentially rearrange loops + if (outer_level < level) { + std::string tmp = idxNames[stmt][level - 1]; + for (int i = level - 1; i > outer_level - 1; i--) { + if (i - 1 >= 0) + idxNames[stmt][i] = idxNames[stmt][i - 1]; + } + idxNames[stmt][outer_level - 1] = tmp; + } + //TODO: even with a tile size of one, you need a insert (of a dummy loop) + idxNames[stmt].insert(idxNames[stmt].begin() + (level), ""); + } else { + if (!ctrlName.size()) + throw std::runtime_error("No ctrl loop name for tile"); + //insert + idxNames[stmt].insert(idxNames[stmt].begin() + (outer_level - 1), + ctrlName.c_str()); + } + + //printf("idxNames after: "); + //printVS(idxNames[stmt]); +} + +bool LoopCuda::datacopy_privatized_cuda(int stmt_num, int level, + const std::string &array_name, + const std::vector<int> &privatized_levels, bool allow_extra_read, + int fastest_changing_dimension, int padding_stride, + int padding_alignment, bool cuda_shared) { + int old_stmts = stmt.size(); + // printf("before datacopy_privatized:\n"); + printIS(); + //datacopy_privatized(stmt_num, level, array_name, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, cuda_shared); + if (cuda_shared) + datacopy_privatized(stmt_num, level, array_name, privatized_levels, + allow_extra_read, fastest_changing_dimension, padding_stride, + padding_alignment, 1); + else + datacopy_privatized(stmt_num, level, array_name, privatized_levels, + allow_extra_read, fastest_changing_dimension, padding_stride, + padding_alignment, 0); + // printf("after datacopy_privatized:\n"); + printIS(); + + //Adjust idxNames to reflect updated state + std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]); + int new_stmts = stmt.size(); + for (int i = old_stmts; i < new_stmts; i++) { + //printf("fixing up statement %d\n", i); + std::vector<std::string> idxs; + + //protonu-making sure the vector of nonSplitLevels grows along with + //the statement structure + stmt_nonSplitLevels.push_back(std::vector<int>()); + + //Indexes up to level will be the same + for (int j = 0; j < level - 1; j++) + idxs.push_back(cIdxNames[j]); + + //Expect privatized_levels to match + for (int j = 0; j < privatized_levels.size(); j++) + idxs.push_back(cIdxNames[privatized_levels[j] - 1]);//level is one-based + + //all further levels should match order they are in originally + if (privatized_levels.size()) { + int last_privatized = privatized_levels.back(); + int top_level = last_privatized + + (stmt[i].IS.n_set() - idxs.size()); + //printf("last privatized_levels: %d top_level: %d\n", last_privatized, top_level); + for (int j = last_privatized; j < top_level; j++) { + idxs.push_back(cIdxNames[j]); + //printf("pushing back: %s\n", (const char*)cIdxNames[j]); + } + } + idxNames.push_back(idxs); + } +} + +bool LoopCuda::datacopy_cuda(int stmt_num, int level, + const std::string &array_name, + const std::vector<std::string> new_idxs, + bool allow_extra_read, int fastest_changing_dimension, + int padding_stride, int padding_alignment, bool cuda_shared) { + + int old_stmts = stmt.size(); + //datacopy(stmt_num,level,array_name,allow_extra_read,fastest_changing_dimension,padding_stride,padding_alignment,cuda_shared); + // printf("before datacopy:\n"); + // printIS(); + if (cuda_shared) + datacopy(stmt_num, level, array_name, allow_extra_read, + fastest_changing_dimension, padding_stride, padding_alignment, + 1); + else + datacopy(stmt_num, level, array_name, allow_extra_read, + fastest_changing_dimension, padding_stride, padding_alignment, + 0); + // printf("after datacopy:\n"); + printIS(); + + //Adjust idxNames to reflect updated state + std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]); + int new_stmts = stmt.size(); + for (int i = old_stmts; i < new_stmts; i++) { + //printf("fixing up statement %d\n", i); + std::vector<std::string> idxs; + + //protonu-making sure the vector of nonSplitLevels grows along with + //the statement structure + stmt_nonSplitLevels.push_back(std::vector<int>()); + + //Indexes up to level will be the same + for (int j = 0; j < level - 1; j++) + idxs.push_back(cIdxNames[j]); + + //all further levels should get names from new_idxs + int top_level = stmt[i].IS.n_set(); + //printf("top_level: %d level: %d\n", top_level, level); + if (new_idxs.size() < top_level - level + 1) + throw std::runtime_error( + "Need more new index names for new datacopy loop levels"); + + for (int j = level - 1; j < top_level; j++) { + idxs.push_back(new_idxs[j - level + 1].c_str()); + //printf("pushing back: %s\n", new_idxs[j-level+1].c_str()); + } + idxNames.push_back(idxs); + } +} + +bool LoopCuda::unroll_cuda(int stmt_num, int level, int unroll_amount) { + int old_stmts = stmt.size(); + //bool b= unroll(stmt_num, , unroll_amount); + + int dim = 2 * level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim - 1); + + level = nonDummyLevel(stmt_num, level); + //printf("unrolling %d at level %d\n", stmt_num,level); + + //protonu--using the new version of unroll, which returns + //a set of ints instead of a bool. To keep Gabe's logic + //I'll check the size of the set, if it's 0 return true + //bool b= unroll(stmt_num, level, unroll_amount); + std::set<int> b_set = unroll(stmt_num, level, unroll_amount, idxNames); + bool b = false; + if (b_set.size() == 0) + b = true; + //end--protonu + + //Adjust idxNames to reflect updated state + std::vector<std::string> cIdxNames = cleanOrder(idxNames[stmt_num]); + std::vector<std::string> origSource = idxNames[stmt_num]; + ; + //Drop index names at level + if (unroll_amount == 0) { + //For all statements that were in this unroll together, drop index name for unrolled level + idxNames[stmt_num][level - 1] = ""; + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + //printf("in same loop as %d is %d\n", stmt_num, (*i)); + //idxNames[(*i)][level-1] = ""; + idxNames[(*i)] = idxNames[stmt_num]; + } + } + + lex = getLexicalOrder(stmt_num); + same_loop = getStatements(lex, dim - 1); + + bool same_as_source = false; + int new_stmts = stmt.size(); + for (int i = old_stmts; i < new_stmts; i++) { + //Check whether we had a sync for the statement we are unrolling, if + //so, propogate that to newly created statements so that if they are + //in a different loop structure, they will also get a syncthreads + int size = syncs.size(); + for (int j = 0; j < size; j++) { + if (syncs[j].first == stmt_num) + syncs.push_back(make_pair(i, syncs[j].second)); + } + + //protonu-making sure the vector of nonSplitLevels grows along with + //the statement structure + stmt_nonSplitLevels.push_back(std::vector<int>()); + + //We expect that new statements have a constant for the variable in + //stmt[i].IS at level (as seen with print_with_subs), otherwise there + //will be a for loop at level and idxNames should match stmt's + //idxNames pre-unrolled + Relation IS = stmt[i].IS; + //Ok, if you know how the hell to get anything out of a Relation, you + //should probably be able to do this more elegantly. But for now, I'm + //hacking it. + std::string s = IS.print_with_subs_to_string(); + //s looks looks like + //{[_t49,8,_t51,_t52,128]: 0 <= _t52 <= 3 && 0 <= _t51 <= 15 && 0 <= _t49 && 64_t49+16_t52+_t51 <= 128} + //where level == 5, you see a integer in the input set + + //If that's not an integer and this is the first new statement, then + //we think codegen will have a loop at that level. It's not perfect, + //not sure if it can be determined without round-tripping to codegen. + int sIdx = 0; + int eIdx = 0; + for (int j = 0; j < level - 1; j++) { + sIdx = s.find(",", sIdx + 1); + if (sIdx < 0) + break; + } + if (sIdx > 0) { + eIdx = s.find("]"); + int tmp = s.find(",", sIdx + 1); + if (tmp > 0 && tmp < eIdx) + eIdx = tmp; //", before ]" + if (eIdx > 0) { + sIdx++; + std::string var = s.substr(sIdx, eIdx - sIdx); + //printf("%s\n", s.c_str()); + //printf("set var for stmt %d at level %d is %s\n", i, level, var.c_str()); + if (atoi(var.c_str()) == 0 && i == old_stmts) { + //TODO:Maybe do see if this new statement would be in the same + //group as the original and if it would, don't say + //same_as_source + if (same_loop.find(i) == same_loop.end()) { + printf( + "stmt %d level %d, newly created unroll statement should have same level indexes as source\n", + i, level); + same_as_source = true; + } + } + } + } + + //printf("fixing up statement %d n_set %d with %d levels\n", i, stmt[i].IS.n_set(), level-1); + if (same_as_source) + idxNames.push_back(origSource); + else + idxNames.push_back(idxNames[stmt_num]); + } + + return b; +} + +void LoopCuda::copy_to_texture(const char *array_name) { + //protonu--placeholder for now + //set the bool for using cuda memory as true + //in a vector of strings, put the names of arrays to tex mapped + if (!texture) + texture = new texture_memory_mapping(true, array_name); + else + texture->add(array_name); + +} + +//void LoopCuda::copy_to_texture_2d(const char *array_name, int width, int height) { +// if (!texture) +// texture = new texture_memory_mapping(true, array_name, width, height); +// else +// texture->add(array_name, width, height); +//} + +void LoopCuda::copy_to_constant(const char *array_name) { + if(!constant_mem) + constant_mem = new constant_memory_mapping(true, array_name); + else + constant_mem->add(array_name); +} + +//protonu--moving this from Loop +SgNode* LoopCuda::codegen() { + if (code_gen_flags & GenCudaizeV2) + return cudaize_codegen_v2(); + //Do other flagged codegen methods, return plain vanilla generated code + return getCode(); +} + +//These three are in Omega code_gen.cc and are used as a massive hack to +//get out some info from MMGenerateCode. Yea for nasty side-effects. +namespace omega { + extern int checkLoopLevel; + extern int stmtForLoopCheck; + extern int upperBoundForLevel; + extern int lowerBoundForLevel; +} + +CG_outputRepr* LoopCuda::extractCudaUB(int stmt_num, int level, + int &outUpperBound, int &outLowerBound) { + // check for sanity of parameters + const int m = stmt.size(); + if (stmt_num >= m || stmt_num < 0) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + const int n = stmt[stmt_num].xform.n_out(); + if (level > (n - 1) / 2 || level <= 0) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + int dim = 2 * level - 1; + + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim - 1); + + // extract the intersection of the iteration space to be considered + Relation hull; + { + hull = Relation::True(n); + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + Relation r = getNewIS(*i); + for (int j = dim + 2; j <= r.n_set(); j++) + r = Project(r, r.set_var(j)); + hull = Intersection(hull, r); + hull.simplify(2, 4); + } + + for (int i = 2; i <= dim + 1; i += 2) { + //std::string name = std::string("_t") + to_string(t_counter++); + std::string name = std::string("_t") + + to_string(tmp_loop_var_name_counter++); + hull.name_set_var(i, name); + } + hull.setup_names(); + } + + // extract the exact loop bound of the dimension to be unrolled + if (is_single_iteration(hull, dim)) { + throw std::runtime_error( + "No loop availabe at level to extract upper bound."); + } + Relation bound = get_loop_bound(hull, dim); + if (!bound.has_single_conjunct() || !bound.is_satisfiable() + || bound.is_tautology()) + throw loop_error( + "loop error: unable to extract loop bound for cudaize"); + + // extract the loop stride + EQ_Handle stride_eq; + /*int stride = 1; + { + bool simple_stride = true; + int strides = countStrides(bound.query_DNF()->single_conjunct(), + bound.set_var(dim + 1), stride_eq, simple_stride); + if (strides > 1) + throw loop_error("loop error: too many strides"); + else if (strides == 1) { + int sign = stride_eq.get_coef(bound.set_var(dim + 1)); + // assert(sign == 1 || sign == -1); + Constr_Vars_Iter it(stride_eq, true); + stride = abs((*it).coef / sign); + } + } + */ + int stride = 1; + { + + coef_t stride; + std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, + bound.set_var(dim + 1)); + if (result.second == NULL) + stride = 1; + else + stride = abs(result.first.get_coef(result.second)) + / gcd(abs(result.first.get_coef(result.second)), + abs(result.first.get_coef(bound.set_var(dim + 1)))); + + if (stride > 1) + throw loop_error("loop error: too many strides"); + /*else if (stride == 1) { + int sign = result.first.get_coef(bound.set_var(dim+1)); + assert(sign == 1 || sign == -1); + } */ + } + + if (stride != 1) { + char buf[1024]; + sprintf(buf, "Cudaize: Loop at level %d has non-one stride of %d", + level, stride); + throw std::runtime_error(buf); + } + + //Use code generation system to build tell us our bound information. We + //need a hard upper bound a 0 lower bound. + + checkLoopLevel = level * 2; + stmtForLoopCheck = stmt_num; + upperBoundForLevel = -1; + lowerBoundForLevel = -1; + printCode(1, false); + checkLoopLevel = 0; + + outUpperBound = upperBoundForLevel; + outLowerBound = lowerBoundForLevel; + + if (outUpperBound == -1) { + + CG_result* temp = last_compute_cgr_; + + while (temp) { + CG_loop * loop; + if (loop = dynamic_cast<CG_loop*>(temp)) { + if (loop->level_ == 2 * level) { + Relation bound = copy(loop->bounds_); + Variable_ID v = bound.set_var(2 * level); + for (GEQ_Iterator e( + const_cast<Relation &>(bound).single_conjunct()->GEQs()); + e; e++) { + if ((*e).get_coef(v) < 0 + && (*e).is_const_except_for_global(v)) + return output_upper_bound_repr(ir->builder(), *e, v, + bound, + std::vector<std::pair<CG_outputRepr *, int> >( + bound.n_set(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), + 0))); + } + } + if (loop->level_ > 2 * level) + break; + else + temp = loop->body_; + } else + break; + } + } + + return NULL; +} + +void LoopCuda::printCode(int effort, bool actuallyPrint) const { + const int m = stmt.size(); + if (m == 0) + return; + const int n = stmt[0].xform.n_out(); + + /*or (int i = 0; i < m; i++) { + IS[i + 1] = stmt[i].IS; + xform[i + 1] = stmt[i].xform; + + //nonSplitLevels[i+1] = stmt[i].nonSplitLevels; + } + */ + + // invalidate saved codegen computation + if (last_compute_cgr_ != NULL) { + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + } + + if (last_compute_cg_ != NULL) { + delete last_compute_cg_; + last_compute_cg_ = NULL; + } + + //Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + /*CG_stringBuilder *ocg = new CG_stringBuilder(); + Tuple<CG_outputRepr *> nameInfo; + for (int i = 1; i <= m; i++) + nameInfo.append(new CG_stringRepr("s" + to_string(i))); + */ + + // -- replacing MMGenerateCode + // -- formally CG_outputRepr* repr = MMGenerateCode(ocg, xform, IS, nameInfo, known, nonSplitLevels, syncs, idxTupleNames, effort); + // -- in the future, these if statements need to be cleaned up. + // -- something like check_lastComputeCG might be a decent protected member function + // -- and/or something that returns a std::vector<CG_outputRepr*> that also checks last_compute_cg_ + //if (last_compute_cg_ == NULL) { + std::vector<Relation> IS(m); + std::vector<Relation> xforms(m); + std::vector<std::vector<int> > nonSplitLevels(m); + + /* std::vector < std::vector <std::string> > idxTupleNames; + if (useIdxNames) { + for (int i = 0; i < idxNames.size(); i++) { + Tuple<std::string> idxs; + for (int j = 0; j < idxNames[i].size(); j++) + idxs.append(idxNames[i][j]); + idxTupleNames.append(idxs); + } + } + */ + for (int i = 0; i < m; i++) { + IS[i] = stmt[i].IS; + xforms[i] = stmt[i].xform; + nonSplitLevels[i] = stmt_nonSplitLevels[i]; + } + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + + last_compute_cg_ = new CodeGen(xforms, IS, known, nonSplitLevels, idxNames, + syncs); + + delete last_compute_cgr_; // this was just done above? + last_compute_cgr_ = NULL; + //} + + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { + delete last_compute_cgr_; + last_compute_cgr_ = last_compute_cg_->buildAST(effort); + last_compute_effort_ = effort; + } + + //std::vector<CG_outputRepr *> stmts(m); + //for (int i = 0; i < m; i++) + // stmts[i] = stmt[i].code; + //CG_outputRepr* repr = last_compute_cgr_->printRepr(ocg, stmts); + // -- end replacing MMGenerateCode + std::string repr = last_compute_cgr_->printString(); + + if (actuallyPrint) + std::cout << repr << std::endl; + //std::cout << static_cast<CG_stringRepr*>(repr)->GetString(); + /* + for (int i = 1; i <= m; i++) + delete nameInfo[i]; + */ + + //delete ocg; +} + +void LoopCuda::printRuntimeInfo() const { + for (int i = 0; i < stmt.size(); i++) { + Relation IS = stmt[i].IS; + Relation xform = stmt[i].xform; + printf("stmt[%d]\n", i); + printf("IS\n"); + IS.print_with_subs(); + + printf("xform[%d]\n", i); + xform.print_with_subs(); + + } +} + +void LoopCuda::printIndexes() const { + for (int i = 0; i < stmt.size(); i++) { + printf("stmt %d nset %d ", i, stmt[i].IS.n_set()); + + for (int j = 0; j < idxNames[i].size(); j++) { + if (j > 0) + printf(","); + printf("%s", idxNames[i][j].c_str()); + } + printf("\n"); + } +} + +SgNode* LoopCuda::getCode(int effort) const { + const int m = stmt.size(); + if (m == 0) + return new SgNode; + const int n = stmt[0].xform.n_out(); + /* + Tuple<CG_outputRepr *> ni(m); + Tuple < Relation > IS(m); + Tuple < Relation > xform(m); + vector < vector <int> > nonSplitLevels(m); + for (int i = 0; i < m; i++) { + ni[i + 1] = stmt[i].code; + IS[i + 1] = stmt[i].IS; + xform[i + 1] = stmt[i].xform; + nonSplitLevels[i + 1] = stmt_nonSplitLevels[i]; + + //nonSplitLevels[i+1] = stmt[i].nonSplitLevels; + } + */ + //Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); +//#ifdef DEBUG +//#endif + //std::cout << GetString(MMGenerateCode(new CG_stringBuilder(), xform, IS, ni, known, + // nonSplitLevels, syncs, idxTupleNames, effort)); + if (last_compute_cgr_ != NULL) { + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + } + + if (last_compute_cg_ != NULL) { + delete last_compute_cg_; + last_compute_cg_ = NULL; + } + + CG_outputBuilder *ocg = ir->builder(); + // -- replacing MMGenerateCode + // -- formally CG_outputRepr* repr = MMGenerateCode(ocg, xform, IS, nameInfo, known, nonSplitLevels, syncs, idxTupleNames, effort); + // -- in the future, these if statements need to be cleaned up. + // -- something like check_lastComputeCG might be a decent protected member function + // -- and/or something that returns a std::vector<CG_outputRepr*> that also checks last_compute_cg_ + //if (last_compute_cg_ == NULL) { + std::vector<Relation> IS(m); + std::vector<Relation> xforms(m); + std::vector<std::vector<int> > nonSplitLevels(m); + for (int i = 0; i < m; i++) { + IS[i] = stmt[i].IS; + xforms[i] = stmt[i].xform; + nonSplitLevels[i] = stmt_nonSplitLevels[i]; + } + + /*std::vector < std::vector<std::string> > idxTupleNames; + if (useIdxNames) { + for (int i = 0; i < idxNames.size(); i++) { + std::vector<std::string> idxs; + for (int j = 0; j < idxNames[i].size(); j++) + idxs.push_back(idxNames[i][j]); + idxTupleNames.push_back(idxs); + } + } + */ + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + + last_compute_cg_ = new CodeGen(xforms, IS, known, nonSplitLevels, idxNames, + syncs); + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + //} + + if (last_compute_cgr_ == NULL || last_compute_effort_ != effort) { + delete last_compute_cgr_; + last_compute_cgr_ = last_compute_cg_->buildAST(effort); + last_compute_effort_ = effort; + } + + std::vector<CG_outputRepr *> stmts(m); + for (int i = 0; i < m; i++) + stmts[i] = stmt[i].code; + CG_outputRepr* repr = last_compute_cgr_->printRepr(ocg, stmts); + // -- end replacing MMGenerateCode + + //CG_outputRepr *overflow_initialization = ocg->CreateStmtList(); + CG_outputRepr *overflow_initialization = ocg->StmtListAppend(NULL, NULL); + for (std::map<int, std::vector<Free_Var_Decl *> >::const_iterator i = + overflow.begin(); i != overflow.end(); i++) + for (std::vector<Free_Var_Decl *>::const_iterator j = i->second.begin(); + j != i->second.end(); j++) + //overflow_initialization = ocg->StmtListAppend(overflow_initialization, ocg->CreateStmtList(ocg->CreateAssignment(0, ocg->CreateIdent((*j)->base_name()), ocg->CreateInt(0)))); + overflow_initialization = ocg->StmtListAppend( + overflow_initialization, + ocg->StmtListAppend( + ocg->CreateAssignment(0, + ocg->CreateIdent((*j)->base_name()), + ocg->CreateInt(0)), NULL)); + + repr = ocg->StmtListAppend(overflow_initialization, repr); + SgNode *tnl = static_cast<CG_roseRepr *>(repr)->GetCode(); + SgStatementPtrList *list = static_cast<CG_roseRepr *>(repr)->GetList(); + + if (tnl != NULL) + return tnl; + else if (tnl == NULL && list != NULL) { + SgBasicBlock* bb2 = buildBasicBlock(); + + for (SgStatementPtrList::iterator it = (*list).begin(); + it != (*list).end(); it++) + bb2->append_statement(*it); + + tnl = isSgNode(bb2); + } else + throw loop_error("codegen failed"); + + delete repr; + /* + for (int i = 1; i <= m; i++) + delete ni[i]; + */ + return tnl; + +} + +//protonu--adding constructors for the new derived class +LoopCuda::LoopCuda() : + Loop(), code_gen_flags(GenInit) { +} + +LoopCuda::LoopCuda(IR_Control *irc, int loop_num) : + Loop(irc) { + setup_code = NULL; + teardown_code = NULL; + code_gen_flags = 0; + cu_bx = cu_by = cu_tx = cu_ty = cu_tz = 1; + cu_bx_repr = NULL; + cu_tx_repr = NULL; + cu_by_repr = NULL; + cu_ty_repr = NULL; + cu_tz_repr = NULL; + + cu_num_reduce = 0; + cu_mode = GlobalMem; + texture = NULL; + constant_mem = NULL; + + int m = stmt.size(); + //printf("\n the size of stmt(initially) is: %d\n", stmt.size()); + for (int i = 0; i < m; i++) + stmt_nonSplitLevels.push_back(std::vector<int>()); + + globals = ((IR_cudaroseCode *) ir)->gsym_; + globalScope = ((IR_cudaroseCode *) ir)->first_scope; + parameter_symtab = ((IR_cudaroseCode *) ir)->parameter; + body_symtab = ((IR_cudaroseCode *) ir)->body; + func_body = ((IR_cudaroseCode *) ir)->defn; + func_definition = ((IR_cudaroseCode *) ir)->func_defn; + std::vector<SgForStatement *> tf = ((IR_cudaroseCode *) ir)->get_loops(); + + symtab = tf[loop_num]->get_symbol_table(); + + std::vector<SgForStatement *> deepest = find_deepest_loops( + isSgNode(tf[loop_num])); + + for (int i = 0; i < deepest.size(); i++) { + SgVariableSymbol* vs; + SgForInitStatement* list = deepest[i]->get_for_init_stmt(); + SgStatementPtrList& initStatements = list->get_init_stmt(); + SgStatementPtrList::const_iterator j = initStatements.begin(); + if (SgExprStatement *expr = isSgExprStatement(*j)) + if (SgAssignOp* op = isSgAssignOp(expr->get_expression())) + if (SgVarRefExp* var_ref = isSgVarRefExp(op->get_lhs_operand())) + vs = var_ref->get_symbol(); + + index.push_back(vs->get_name().getString().c_str()); //reflects original code index names + } + + for (int i = 0; i < stmt.size(); i++) + idxNames.push_back(index); //refects prefered index names (used as handles in cudaize v2) + useIdxNames = false; + +} + +void LoopCuda::printIS() { + if (!cudaDebug) return; + int k = stmt.size(); + for (int i = 0; i < k; i++) { + printf(" printing statement:%d\n", i); + stmt[i].IS.print(); + } +} + diff --git a/loop_cuda_rose.hh b/loop_cuda_rose.hh new file mode 100644 index 0000000..5260035 --- /dev/null +++ b/loop_cuda_rose.hh @@ -0,0 +1,132 @@ +#ifndef LOOP_CUDA_HH +#define LOOP_CUDA_HH + +#include "loop.hh" +#include "mem_mapping_utils.hh" +#include <string.h> +#include "rose.h" +//#include <suif1.h> + +using namespace omega; +using namespace SageBuilder; +enum MemoryMode { GlobalMem, SharedMem, TexMem }; + +struct VarDefs { + std::string name; + std::string secondName; + SgExpression* size_expr; //array size as an expression (can be a product of other variables etc) + SgType* type; + SgVariableSymbol* in_data; //Variable of array to copy data in from (before kernel call) + SgVariableSymbol* out_data; //Variable of array to copy data out to (after kernel call) + std::vector<int> size_multi_dim; //-1 if linearized, the constant size N, of a NxN 2D array otherwise + bool tex_mapped; //protonu-- true if this variable will be texture mapped, so no need to pass it as a argument + bool cons_mapped; + std::string original_name; //this is such a hack, to store the original name, to store a table to textures used +}; + + +class LoopCuda: public Loop{ + +public: + //std::vector<proc_sym*> new_procs; //Need adding to a fse + std::vector< std::vector<std::string> > idxNames; + std::vector< std::pair<int, std::string> > syncs; + bool useIdxNames; + std::vector<std::string> index; + + SgSymbolTable* symtab; + SgSymbolTable* parameter_symtab; + SgSymbolTable* body_symtab; + SgGlobal* globals; + SgGlobal* globalScope; + SgScopeStatement* func_body; + SgFunctionDefinition* func_definition; + //protonu--inserting this here, Gabe's implementation had it + //the struct statment as nonSplitLevels + std::vector<std::vector<int> > stmt_nonSplitLevels; + + texture_memory_mapping *texture; //protonu + constant_memory_mapping *constant_mem; + std::map<std::string, int> array_dims; + omega::CG_outputRepr *setup_code; + omega::CG_outputRepr *teardown_code; + + unsigned int code_gen_flags; + enum CodeGenFlags { + GenInit = 0x00, + GenCudaizeV2 = 0x02, + }; + + + //varibles used by cudaize_codegen + //block x, y sizes, N and num_red + int cu_bx, cu_by, cu_n, cu_num_reduce; + //block statement and level + int cu_block_stmt, cu_block_level; + //thread x, y, z + int cu_tx, cu_ty, cu_tz; + + //Anand: Adding CG_outputRepr* representations of cu_bx, cu_by, cu_tx, cu_ty + //and cu_tz for non constant loop bounds + + CG_outputRepr *cu_bx_repr, *cu_by_repr, *cu_tx_repr, *cu_ty_repr, *cu_tz_repr; + + //tile statements, and loop-levels (cudaize v1) + std::vector< std::vector<int> > cu_thread_loop; + std::vector<int> cu_thread_sync; + MemoryMode cu_mode; + + std::string cu_nx_name, cu_ny_name, cu_kernel_name; + int nonDummyLevel(int stmt, int level); + bool symbolExists(std::string s); + void addSync(int stmt, std::string idx); + void renameIndex(int stmt, std::string idx, std::string newName); + bool validIndexes(int stmt, const std::vector<std::string>& idxs); + CG_outputRepr* extractCudaUB(int stmt_num, int level, int &outUpperBound, int &outLowerBound); + + void printCode(int effort=1, bool actuallyPrint=true) const; + void printRuntimeInfo() const; + void printIndexes() const; + SgNode* getCode(int effort = 1) const; + void printIS(); + + + void permute_cuda(int stmt, const std::vector<std::string>& curOrder); + //protonu-writing a wrapper for the Chun's new permute function + bool permute(int stmt_num, const std::vector<int> &pi); + //end--protonu. + void tile_cuda(int stmt, int level, int outer_level); + void tile_cuda(int level, int tile_size, int outer_level, std::string idxName, std::string ctrlName, TilingMethodType method=StridedTile); + void tile_cuda(int stmt, int level, int tile_size, int outer_level, std::string idxName, std::string ctrlName, TilingMethodType method=StridedTile); + bool datacopy_privatized_cuda(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 1, bool cuda_shared=false); + bool datacopy_cuda(int stmt_num, int level, const std::string &array_name, std::vector<std::string> new_idxs, bool allow_extra_read = false, int fastest_changing_dimension = -1, int padding_stride = 1, int padding_alignment = 4, bool cuda_shared=false); + bool unroll_cuda(int stmt_num, int level, int unroll_amount); + //protonu--using texture memory + void copy_to_texture(const char *array_name); + void copy_to_constant(const char *array_name); + int findCurLevel(int stmt, std::string idx); + /** + * + * @param kernel_name Name of the GPU generated kernel + * @param nx Iteration space over the x dimention + * @param ny Iteration space over the y dimention + * @param tx Tile dimention over x dimention + * @param ty Tile dimention over the y dimention + * @param num_reduce The number of dimentions to reduce by mapping to the GPU implicit blocks/threads + */ + //stmnt_num is referenced from the perspective of being inside the cudize block loops + bool cudaize_v2(std::string kernel_name, std::map<std::string, int> array_dims, + std::vector<std::string> blockIdxs, std::vector<std::string> threadIdxs); + SgNode* cudaize_codegen_v2(); + SgNode* codegen(); + + //protonu--have to add the constructors for the new class + //and maybe destructors (?) + LoopCuda(); + //LoopCuda(IR_Code *ir, tree_for *tf, global_symtab* gsym); + LoopCuda(IR_Control *ir_c, int loop_num);//protonu-added so as to not change ir_suif + ~LoopCuda(); + +}; + +#endif diff --git a/loop_datacopy.cc b/loop_datacopy.cc new file mode 100644 index 0000000..36acb01 --- /dev/null +++ b/loop_datacopy.cc @@ -0,0 +1,2166 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Various data copy schemes. + + Notes: + + History: + 02/20/09 Created by Chun Chen by splitting original datacopy from loop.cc +*****************************************************************************/ + +#include <codegen.h> +#include <code_gen/CG_utils.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + +// +// data copy function by referring arrays by numbers. +// e.g. A[i] = A[i-1] + B[i] +// parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i] +// +bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + // check for sanity of parameters + std::set<int> same_loop; + for (int i = 0; i < array_ref_nums.size(); i++) { + int stmt_num = array_ref_nums[i].first; + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + if (i == 0) { + std::vector<int> lex = getLexicalOrder(stmt_num); + same_loop = getStatements(lex, 2*level-2); + } + else if (same_loop.find(stmt_num) == same_loop.end()) + throw std::invalid_argument("array references for data copy must be located in the same subloop"); + } + + // convert array reference numbering scheme to actual array references + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (int i = 0; i < array_ref_nums.size(); i++) { + if (array_ref_nums[i].second.size() == 0) + continue; + + int stmt_num = array_ref_nums[i].first; + selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>())); + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code); + std::vector<bool> selected(refs.size(), false); + for (int j = 0; j < array_ref_nums[i].second.size(); j++) { + int ref_num = array_ref_nums[i].second[j]; + if (ref_num < 0 || ref_num >= refs.size()) { + for (int k = 0; k < refs.size(); k++) + delete refs[k]; + throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); + } + selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); + selected[ref_num] = true; + } + for (int j = 0; j < refs.size(); j++) + if (!selected[j]) + delete refs[j]; + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references to copy"); + + // do the copy + return datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + +// +// data copy function by referring arrays by name. +// e.g. A[i] = A[i-1] + B[i] +// parameter array_name=A means to copy data touched by A[i-1] and A[i] +// +bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + // collect array references by name + std::vector<int> lex = getLexicalOrder(stmt_num); + int dim = 2*level - 1; + std::set<int> same_loop = getStatements(lex, dim-1); + + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + std::vector<IR_ArrayRef *> t; + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code); + for (int j = 0; j < refs.size(); j++) + if (refs[j]->name() == array_name) + t.push_back(refs[j]); + else + delete refs[j]; + if (t.size() != 0) + selected_refs.push_back(std::make_pair(*i, t)); + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); + + // do the copy + return datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + + +bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + // collect array references by name + std::vector<int> lex = getLexicalOrder(stmt_num); + int dim = 2*level - 1; + std::set<int> same_loop = getStatements(lex, dim-1); + + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + selected_refs.push_back(std::make_pair(*i, std::vector<IR_ArrayRef *>())); + + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code); + for (int j = 0; j < refs.size(); j++) + if (refs[j]->name() == array_name) + selected_refs[selected_refs.size()-1].second.push_back(refs[j]); + else + delete refs[j]; + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); + + // do the copy + return datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + + +bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, const std::vector<int> &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + // check for sanity of parameters + std::set<int> same_loop; + for (int i = 0; i < array_ref_nums.size(); i++) { + int stmt_num = array_ref_nums[i].first; + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + if (i == 0) { + std::vector<int> lex = getLexicalOrder(stmt_num); + same_loop = getStatements(lex, 2*level-2); + } + else if (same_loop.find(stmt_num) == same_loop.end()) + throw std::invalid_argument("array references for data copy must be located in the same subloop"); + } + + // convert array reference numbering scheme to actual array references + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (int i = 0; i < array_ref_nums.size(); i++) { + if (array_ref_nums[i].second.size() == 0) + continue; + + int stmt_num = array_ref_nums[i].first; + selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>())); + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code); + std::vector<bool> selected(refs.size(), false); + for (int j = 0; j < array_ref_nums[i].second.size(); j++) { + int ref_num = array_ref_nums[i].second[j]; + if (ref_num < 0 || ref_num >= refs.size()) { + for (int k = 0; k < refs.size(); k++) + delete refs[k]; + throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); + } + selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); + selected[ref_num] = true; + } + for (int j = 0; j < refs.size(); j++) + if (!selected[j]) + delete refs[j]; + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references to copy"); + + // do the copy + return datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + + +// +// Implement low level datacopy function with lots of options. +// +/*bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, int level, + const std::vector<int> &privatized_levels, + bool allow_extra_read, int fastest_changing_dimension, + int padding_stride, int padding_alignment, int memory_type) { + if (stmt_refs.size() == 0) + return true; + + // check for sanity of parameters + IR_ArraySymbol *sym = NULL; + std::vector<int> lex; + std::set<int> active; + if (level <= 0) + throw std::invalid_argument("invalid loop level " + to_string(level)); + for (int i = 0; i < privatized_levels.size(); i++) { + if (i == 0) { + if (privatized_levels[i] < level) + throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level)); + } + else if (privatized_levels[i] <= privatized_levels[i-1]) + throw std::invalid_argument("privatized loop levels must be in ascending order"); + } + for (int i = 0; i < stmt_refs.size(); i++) { + int stmt_num = stmt_refs[i].first; + active.insert(stmt_num); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (privatized_levels.size() != 0) { + if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num)); + } + else { + if (level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num)); + } + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + if (sym == NULL) { + sym = stmt_refs[i].second[j]->symbol(); + lex = getLexicalOrder(stmt_num); + } + else { + IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol(); + if (t->name() != sym->name()) { + delete t; + delete sym; + throw std::invalid_argument("try to copy data from different arrays"); + } + delete t; + } + } + } + if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim())) + throw std::invalid_argument("invalid fastest changing dimension for the array to be copied"); + if (padding_stride < 0) + throw std::invalid_argument("invalid temporary array stride requirement"); + if (padding_alignment == -1 || padding_alignment == 0) + throw std::invalid_argument("invalid temporary array alignment requirement"); + + int dim = 2*level - 1; + int n_dim = sym->n_dim(); + + if (fastest_changing_dimension == -1) + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_ROW_MAJOR: + fastest_changing_dimension = n_dim - 1; + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + fastest_changing_dimension = 0; + break; + default: + throw loop_error("unsupported array layout"); + } + + + // build iteration spaces for all reads and for all writes separately + apply_xform(active); + bool has_write_refs = false; + bool has_read_refs = false; + Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); + Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); + for (int i = 0; i < stmt_refs.size(); i++) { + int stmt_num = stmt_refs[i].first; + + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim); + for (int k = 1; k <= mapping.n_inp(); k++) + mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name()); + mapping.setup_names(); + F_And *f_root = mapping.add_and(); + for (int k = 1; k <= level-1; k++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(k), 1); + h.update_coef(mapping.output_var(k), -1); + } + for (int k = 0; k < privatized_levels.size(); k++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(privatized_levels[k]), 1); + h.update_coef(mapping.output_var(level+k), -1); + } + for (int k = 0; k < n_dim; k++) { + CG_outputRepr *repr = stmt_refs[i].second[j]->index(k); + exp2formula(ir, mapping, f_root, freevar, repr, mapping.output_var(level-1+privatized_levels.size()+k+1), 'w', IR_COND_EQ, false); + repr->clear(); + delete repr; + } + Relation r = Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); + if (stmt_refs[i].second[j]->is_write()) { + has_write_refs = true; + wo_copy_is = Union(wo_copy_is, r); + wo_copy_is.simplify(2, 4); + } + else { + has_read_refs = true; + //protonu--removing the next line for now + ro_copy_is = Union(ro_copy_is, r); + ro_copy_is.simplify(2, 4); + //ro_copy_is = ConvexRepresentation(Union(ro_copy_is, r)); + + } + } + } + + if (allow_extra_read) { + Relation t = DecoupledConvexHull(copy(ro_copy_is)); + if (t.number_of_conjuncts() > 1) + ro_copy_is = RectHull(ro_copy_is); + else + ro_copy_is = t; + } + else { + Relation t = ConvexRepresentation(copy(ro_copy_is)); + if (t.number_of_conjuncts() > 1) + ro_copy_is = RectHull(ro_copy_is); + else + ro_copy_is = t; + } + wo_copy_is = ConvexRepresentation(wo_copy_is); + + if (allow_extra_read) { + Tuple<Relation> Rs; + Tuple<int> active; + for (DNF_Iterator di(ro_copy_is.query_DNF()); di; di++) { + Rs.append(Relation(ro_copy_is, di.curr())); + active.append(1); + } + Relation the_gcs = Relation::True(ro_copy_is.n_set()); + for (int i = level-1+privatized_levels.size()+1; i <= level-1+privatized_levels.size()+n_dim; i++) { + Relation r = greatest_common_step(Rs, active, i, Relation::Null()); + the_gcs = Intersection(the_gcs, r); + } + + ro_copy_is = Approximate(ro_copy_is); + ro_copy_is = ConvexRepresentation(ro_copy_is); + ro_copy_is = Intersection(ro_copy_is, the_gcs); + ro_copy_is.simplify(); + } + + + + for (int i = 1; i < level; i++) { + std::string s = stmt[*active.begin()].IS.input_var(i)->name(); + wo_copy_is.name_set_var(i, s); + ro_copy_is.name_set_var(i, s); + } + for (int i = 0; i < privatized_levels.size(); i++) { + std::string s = stmt[*active.begin()].IS.input_var(privatized_levels[i])->name(); + wo_copy_is.name_set_var(level+i, s); + ro_copy_is.name_set_var(level+i, s); + } + for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) { + std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size()); + wo_copy_is.name_set_var(i, s); + ro_copy_is.name_set_var(i, s); + } + tmp_loop_var_name_counter += n_dim; + + //protonu--end change + + wo_copy_is.setup_names(); + ro_copy_is.setup_names(); + + // build merged iteration space for calculating temporary array size + bool already_use_recthull = false; + Relation untampered_copy_is = ConvexRepresentation(Union(copy(wo_copy_is), copy(ro_copy_is))); + Relation copy_is = untampered_copy_is; + if (copy_is.number_of_conjuncts() > 1) { + try { + copy_is = ConvexHull(copy(untampered_copy_is)); + } + catch (const std::overflow_error &e) { + copy_is = RectHull(copy(untampered_copy_is)); + already_use_recthull = true; + } + } + + + Retry_copy_is: + // extract temporary array information + CG_outputBuilder *ocg = ir->builder(); + std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL + std::vector<coef_t> index_stride(n_dim, 1); + std::vector<bool> is_index_eq(n_dim, false); + std::vector<std::pair<int, CG_outputRepr *> > index_sz(0); + Relation reduced_copy_is = copy(copy_is); + + for (int i = 0; i < n_dim; i++) { + if (i != 0) + reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var); + Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i); + + // extract stride + EQ_Handle stride_eq; + { + bool simple_stride = true; + int strides = countStrides(bound.query_DNF()->single_conjunct(), bound.set_var(level-1+privatized_levels.size()+i+1), stride_eq, simple_stride); + if (strides > 1) { + throw loop_error("too many strides"); + } + else if (strides == 1) { + int sign = stride_eq.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + Constr_Vars_Iter it(stride_eq, true); + index_stride[i] = abs((*it).coef/sign); + } + } + + // check if this arary index requires loop + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (EQ_Iterator ei(c->EQs()); ei; ei++) { + if ((*ei).has_wildcards()) + continue; + + int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + if (coef != 0) { + int sign = 1; + if (coef < 0) { + coef = -coef; + sign = -1; + } + + CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*ei); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) + if ((*ci).coef*sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef*sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef*sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef*sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef*sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef*sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("unsupported array index expression"); + } + } + if ((*ei).get_const() != 0) + op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const()))); + if (coef != 1) + op = ocg->CreateIntegerDivide(op, ocg->CreateInt(coef)); + + index_lb[i] = op; + is_index_eq[i] = true; + break; + } + } + if (is_index_eq[i]) + continue; + + // seperate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + if (coef != 0 && (*gi).has_wildcards()) { + bool clean_bound = true; + GEQ_Handle h; + for (Constr_Vars_Iter cvi(*gi, true); gi; gi++) + if (!findFloorInequality(bound, (*cvi).var, h, bound.set_var(level-1+privatized_levels.size()+i+1))) { + clean_bound = false; + break; + } + if (!clean_bound) + continue; + } + + if (coef > 0) + lb_list.push_back(*gi); + else if (coef < 0) + ub_list.push_back(*gi); + } + if (lb_list.size() == 0 || ub_list.size() == 0) + if (already_use_recthull) + throw loop_error("failed to calcuate array footprint size"); + else { + copy_is = RectHull(copy(untampered_copy_is)); + already_use_recthull = true; + goto Retry_copy_is; + } + + // build lower bound representation + Tuple<CG_outputRepr *> lb_repr_list; + for (int j = 0; j < lb_list.size(); j++) + lb_repr_list.append(outputLBasRepr(ocg, lb_list[j], bound, + bound.set_var(level-1+privatized_levels.size()+i+1), + index_stride[i], stride_eq, Relation::True(bound.n_set()), + std::vector<CG_outputRepr *>(bound.n_set()))); + + if (lb_repr_list.size() > 1) + index_lb[i] = ocg->CreateInvoke("max", lb_repr_list); + else if (lb_repr_list.size() == 1) + index_lb[i] = lb_repr_list[1]; + + // build temporary array size representation + { + Relation cal(copy_is.n_set(), 1); + F_And *f_root = cal.add_and(); + for (int j = 0; j < ub_list.size(); j++) + for (int k = 0; k < lb_list.size(); k++) { + GEQ_Handle h = f_root->add_GEQ(); + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); + } + } + h.update_const(ub_list[j].get_const()); + + for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); + } + } + h.update_const(lb_list[k].get_const()); + + h.update_const(1); + h.update_coef(cal.output_var(1), -1); + } + + cal = Restrict_Domain(cal, copy(copy_is)); + for (int j = 1; j <= cal.n_inp(); j++) + cal = Project(cal, j, Input_Var); + cal.simplify(); + + // pad temporary array size + // TODO: for variable array size, create padding formula + Conjunct *c = cal.query_DNF()->single_conjunct(); + bool is_index_bound_const = false; + for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) + if ((*gi).is_const(cal.output_var(1))) { + coef_t size = (*gi).get_const() / (-(*gi).get_coef(cal.output_var(1))); + if (padding_stride != 0) { + size = (size + index_stride[i] - 1) / index_stride[i]; + if (i == fastest_changing_dimension) + size = size * padding_stride; + } + if (i == fastest_changing_dimension) { + if (padding_alignment > 1) { // align to boundary for data packing + int residue = size % padding_alignment; + if (residue) + size = size+padding_alignment-residue; + } + else if (padding_alignment < -1) { // un-alignment for memory bank conflicts + while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1) + size++; + } + } + index_sz.push_back(std::make_pair(i, ocg->CreateInt(size))); + is_index_bound_const = true; + } + + if (!is_index_bound_const) { + for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) { + int coef = (*gi).get_coef(cal.output_var(1)); + if (coef < 0) { + CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*gi); ci; ci++) { + if ((*ci).var != cal.output_var(1)) { + switch((*ci).var->kind()) { + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef == 1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef == -1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef > 1) + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name()))); + else // (*ci).coef < -1 + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("failed to generate array index bound code"); + } + } + } + int c = (*gi).get_const(); + if (c > 0) + op = ocg->CreatePlus(op, ocg->CreateInt(c)); + else if (c < 0) + op = ocg->CreateMinus(op, ocg->CreateInt(-c)); + if (padding_stride != 0) { + if (i == fastest_changing_dimension) { + coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[i] / g; + if (t1 != 1) + op = ocg->CreateIntegerDivide(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + op = ocg->CreateTimes(op, ocg->CreateInt(t2)); + } + else if (index_stride[i] != 1) { + op = ocg->CreateIntegerDivide(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i])); + } + } + + index_sz.push_back(std::make_pair(i, op)); + break; + } + } + } + } + } + + // change the temporary array index order + for (int i = 0; i < index_sz.size(); i++) + if (index_sz[i].first == fastest_changing_dimension) + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_ROW_MAJOR: + std::swap(index_sz[index_sz.size()-1], index_sz[i]); + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + std::swap(index_sz[0], index_sz[i]); + break; + default: + throw loop_error("unsupported array layout"); + } + + // declare temporary array or scalar + IR_Symbol *tmp_sym; + if (index_sz.size() == 0) { + tmp_sym = ir->CreateScalarSymbol(sym, memory_type); + } + else { + std::vector<CG_outputRepr *> tmp_array_size(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) + tmp_array_size[i] = index_sz[i].second->clone(); + tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type); + } + + // create temporary array read initialization code + CG_outputRepr *copy_code_read; + if (has_read_refs) + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert()); + } + else { + std::vector<CG_outputRepr *> lhs_index(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) { + int cur_index_num = index_sz[i].first; + CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (i == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + lhs_index[i] = cur_index_repr; + } + + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); + } + + // create temporary array write back code + CG_outputRepr *copy_code_write; + if (has_write_refs) + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert()); + } + else { + std::vector<CG_outputRepr *> lhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + lhs_index[i] = index_lb[i]->clone(); + else + lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index); + + std::vector<CG_outputRepr *> rhs_index(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) { + int cur_index_num = index_sz[i].first; + CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (i == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + rhs_index[i] = cur_index_repr; + } + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index); + + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert()); + } + + // now we can remove those loops for array indexes that are + // dependent on others + if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) { + Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size()); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + } + + int cur_index = 0; + std::vector<int> mapped_index(index_sz.size()); + for (int i = 0; i < n_dim; i++) + if (!is_index_eq[i]) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1); + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { + h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1); + mapped_index[index_sz.size()-cur_index-1] = i; + break; + } + case IR_ARRAY_LAYOUT_ROW_MAJOR: { + h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1); + mapped_index[cur_index] = i; + break; + } + default: + throw loop_error("unsupported array layout"); + } + cur_index++; + } + + wo_copy_is = Range(Restrict_Domain(copy(mapping), wo_copy_is)); + ro_copy_is = Range(Restrict_Domain(copy(mapping), ro_copy_is)); + + // protonu--replacing Chun's old code + for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + wo_copy_is.name_set_var(i, copy_is.set_var(i)->name()); + ro_copy_is.name_set_var(i, copy_is.set_var(i)->name()); + } + + + + for (int i = 0; i < index_sz.size(); i++) { + wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + } + wo_copy_is.setup_names(); + ro_copy_is.setup_names(); + } + + // insert read copy statement + int old_num_stmt = stmt.size(); + int ro_copy_stmt_num = -1; + if (has_read_refs) { + Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1); + { + F_And *f_root = copy_xform.add_and(); + for (int i = 1; i <= ro_copy_is.n_set(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.input_var(i), 1); + h.update_coef(copy_xform.output_var(2*i), -1); + } + for (int i = 1; i <= dim; i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), -1); + h.update_const(lex[i-1]); + } + for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), 1); + } + } + + Statement copy_stmt_read; + copy_stmt_read.IS = ro_copy_is; + copy_stmt_read.xform = copy_xform; + copy_stmt_read.code = copy_code_read; + copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set()); + copy_stmt_read.ir_stmt_node = NULL; + for (int i = 0; i < level-1; i++) { + copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; + if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && + stmt[*(active.begin())].loop_level[i].payload >= level) { + int j; + for (j = 0; j < privatized_levels.size(); j++) + if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) + break; + if (j == privatized_levels.size()) + copy_stmt_read.loop_level[i].payload = -1; + else + copy_stmt_read.loop_level[i].payload = level + j; + } + else + copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; + copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; + } + for (int i = 0; i < privatized_levels.size(); i++) { + copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + } + int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); + for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + + shiftLexicalOrder(lex, dim-1, 1); + stmt.push_back(copy_stmt_read); + ro_copy_stmt_num = stmt.size() - 1; + dep.insert(); + } + + // insert write copy statement + int wo_copy_stmt_num = -1; + if (has_write_refs) { + Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1); + { + F_And *f_root = copy_xform.add_and(); + for (int i = 1; i <= wo_copy_is.n_set(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.input_var(i), 1); + h.update_coef(copy_xform.output_var(2*i), -1); + } + for (int i = 1; i <= dim; i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), -1); + h.update_const(lex[i-1]); + } + for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), 1); + } + } + + Statement copy_stmt_write; + copy_stmt_write.IS = wo_copy_is; + copy_stmt_write.xform = copy_xform; + copy_stmt_write.code = copy_code_write; + copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set()); + copy_stmt_write.ir_stmt_node = NULL; + + for (int i = 0; i < level-1; i++) { + copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; + if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && + stmt[*(active.begin())].loop_level[i].payload >= level) { + int j; + for (j = 0; j < privatized_levels.size(); j++) + if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) + break; + if (j == privatized_levels.size()) + copy_stmt_write.loop_level[i].payload = -1; + else + copy_stmt_write.loop_level[i].payload = level + j; + } + else + copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; + copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; + } + for (int i = 0; i < privatized_levels.size(); i++) { + copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + } + int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); + for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + + lex[dim-1]++; + shiftLexicalOrder(lex, dim-1, -2); + stmt.push_back(copy_stmt_write); + wo_copy_stmt_num = stmt.size() - 1; + dep.insert(); + } + + // replace original array accesses with temporary array accesses + for (int i =0; i < stmt_refs.size(); i++) + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert()); + } + else { + std::vector<CG_outputRepr *> index_repr(index_sz.size()); + for (int k = 0; k < index_sz.size(); k++) { + int cur_index_num = index_sz[k].first; + + CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (k == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + index_repr[k] = cur_index_repr; + } + + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr); + ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert()); + } + } + + // update dependence graph + int dep_dim = get_last_dep_dim_before(*(active.begin()), level) + 1; + if (ro_copy_stmt_num != -1) { + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::vector<DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { + if (active.find(i) != active.end() && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_R2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + dep.connect(ro_copy_stmt_num, j->first, dvs1); + } + else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_W2R)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + D.push_back(dvs1); + } + + if (j->second.size() == 0) + dep.vertex[i].second.erase(j++); + else + j++; + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, ro_copy_stmt_num, D[j]); + } + + // insert dependences from copy statement loop to copied statements + DependenceVector dv; + dv.type = DEP_W2R; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); + dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); + for (int i = dep_dim; i < num_dep_dim; i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + dep.connect(ro_copy_stmt_num, *i, dv); + } + + if (wo_copy_stmt_num != -1) { + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::vector<DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { + if (active.find(i) != active.end() && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_W2R || dv.type == DEP_W2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + dep.connect(wo_copy_stmt_num, j->first, dvs1); + } + else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2W || dv.type == DEP_W2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + D.push_back(dvs1); + } + + if (j->second.size() == 0) + dep.vertex[i].second.erase(j++); + else + j++; + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, wo_copy_stmt_num, D[j]); + } + + // insert dependences from copied statements to write statements + DependenceVector dv; + dv.type = DEP_W2R; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); + dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); + for (int i = dep_dim; i < num_dep_dim; i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + dep.connect(*i, wo_copy_stmt_num, dv); + + } + + // update variable name for dependences among copied statements + for (int i = 0; i < old_num_stmt; i++) { + if (active.find(i) != active.end()) + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) + if (active.find(j->first) != active.end()) + for (int k = 0; k < j->second.size(); k++) { + IR_Symbol *s = tmp_sym->clone(); + j->second[k].sym = s; + } + } + + // insert anti-dependence from write statement to read statement + if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1) + if (dep_dim >= 0) { + DependenceVector dv; + dv.type = DEP_R2W; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); + dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); + for (int k = dep_dim; k < num_dep_dim; k++) { + dv.lbounds[k] = -posInfinity; + dv.ubounds[k] = posInfinity; + } + for (int k = 0; k < dep_dim; k++) { + if (k != 0) { + dv.lbounds[k-1] = 0; + dv.ubounds[k-1] = 0; + } + dv.lbounds[k] = 1; + dv.ubounds[k] = posInfinity; + dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv); + } + } + + + // cleanup + delete sym; + delete tmp_sym; + for (int i = 0; i < index_lb.size(); i++) { + index_lb[i]->clear(); + delete index_lb[i]; + } + for (int i = 0; i < index_sz.size(); i++) { + index_sz[i].second->clear(); + delete index_sz[i].second; + } + + return true; + } +*/ +bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, int level, + const std::vector<int> &privatized_levels, + bool allow_extra_read, int fastest_changing_dimension, + int padding_stride, int padding_alignment, int memory_type) { + if (stmt_refs.size() == 0) + return true; + + // check for sanity of parameters + IR_ArraySymbol *sym = NULL; + std::vector<int> lex; + std::set<int> active; + if (level <= 0) + throw std::invalid_argument("invalid loop level " + to_string(level)); + for (int i = 0; i < privatized_levels.size(); i++) { + if (i == 0) { + if (privatized_levels[i] < level) + throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level)); + } + else if (privatized_levels[i] <= privatized_levels[i-1]) + throw std::invalid_argument("privatized loop levels must be in ascending order"); + } + for (int i = 0; i < stmt_refs.size(); i++) { + int stmt_num = stmt_refs[i].first; + active.insert(stmt_num); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (privatized_levels.size() != 0) { + if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num)); + } + else { + if (level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num)); + } + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + if (sym == NULL) { + sym = stmt_refs[i].second[j]->symbol(); + lex = getLexicalOrder(stmt_num); + } + else { + IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol(); + if (t->name() != sym->name()) { + delete t; + delete sym; + throw std::invalid_argument("try to copy data from different arrays"); + } + delete t; + } + } + } + if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim())) + throw std::invalid_argument("invalid fastest changing dimension for the array to be copied"); + if (padding_stride < 0) + throw std::invalid_argument("invalid temporary array stride requirement"); + if (padding_alignment == -1 || padding_alignment == 0) + throw std::invalid_argument("invalid temporary array alignment requirement"); + + int dim = 2*level - 1; + int n_dim = sym->n_dim(); + + + if (fastest_changing_dimension == -1) + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_ROW_MAJOR: + fastest_changing_dimension = n_dim - 1; + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + fastest_changing_dimension = 0; + break; + default: + throw loop_error("unsupported array layout"); + } + + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + // build iteration spaces for all reads and for all writes separately + apply_xform(active); + + bool has_write_refs = false; + bool has_read_refs = false; + Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); + Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); + for (int i = 0; i < stmt_refs.size(); i++) { + int stmt_num = stmt_refs[i].first; + + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim); + for (int k = 1; k <= mapping.n_inp(); k++) + mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name()); + mapping.setup_names(); + F_And *f_root = mapping.add_and(); + for (int k = 1; k <= level-1; k++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(k), 1); + h.update_coef(mapping.output_var(k), -1); + } + for (int k = 0; k < privatized_levels.size(); k++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(privatized_levels[k]), 1); + h.update_coef(mapping.output_var(level+k), -1); + } + for (int k = 0; k < n_dim; k++) { + CG_outputRepr *repr = stmt_refs[i].second[j]->index(k); + exp2formula(ir, mapping, f_root, freevar, repr, mapping.output_var(level-1+privatized_levels.size()+k+1), 'w', IR_COND_EQ, false); + repr->clear(); + delete repr; + } + Relation r = Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); + if (stmt_refs[i].second[j]->is_write()) { + has_write_refs = true; + wo_copy_is = Union(wo_copy_is, r); + wo_copy_is.simplify(2, 4); + + + } + else { + has_read_refs = true; + ro_copy_is = Union(ro_copy_is, r); + ro_copy_is.simplify(2, 4); + + } + } + } + + // simplify read and write footprint iteration space + { + if (allow_extra_read) + ro_copy_is = SimpleHull(ro_copy_is, true, true); + else + ro_copy_is = ConvexRepresentation(ro_copy_is); + + wo_copy_is = ConvexRepresentation(wo_copy_is); + if (wo_copy_is.number_of_conjuncts() > 1) { + Relation t = SimpleHull(wo_copy_is, true, true); + if (Must_Be_Subset(copy(t), copy(ro_copy_is))) + wo_copy_is = t; + else if (Must_Be_Subset(copy(wo_copy_is), copy(ro_copy_is))) + wo_copy_is = ro_copy_is; + } + } + + // make copy statement variable names match the ones in the original statements which + // already have the same names due to apply_xform + { + int ref_stmt = *active.begin(); + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + if (stmt[*i].IS.n_set() > stmt[ref_stmt].IS.n_set()) + ref_stmt = *i; + for (int i = 1; i < level; i++) { + std::string s = stmt[ref_stmt].IS.input_var(i)->name(); + wo_copy_is.name_set_var(i, s); + ro_copy_is.name_set_var(i, s); + } + for (int i = 0; i < privatized_levels.size(); i++) { + std::string s = stmt[ref_stmt].IS.input_var(privatized_levels[i])->name(); + wo_copy_is.name_set_var(level+i, s); + ro_copy_is.name_set_var(level+i, s); + } + for (int i = level+privatized_levels.size(); i < level+privatized_levels.size()+n_dim; i++) { + std::string s = tmp_loop_var_name_prefix + to_string(tmp_loop_var_name_counter+i-level-privatized_levels.size()); + wo_copy_is.name_set_var(i, s); + ro_copy_is.name_set_var(i, s); + } + tmp_loop_var_name_counter += n_dim; + wo_copy_is.setup_names(); + ro_copy_is.setup_names(); + } + + // build merged footprint iteration space for calculating temporary array size + Relation copy_is = SimpleHull(Union(copy(ro_copy_is), copy(wo_copy_is)), true, true); + + // extract temporary array information + CG_outputBuilder *ocg = ir->builder(); + std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL + std::vector<coef_t> index_stride(n_dim); + std::vector<bool> is_index_eq(n_dim, false); + std::vector<std::pair<int, CG_outputRepr *> > index_sz(0); + Relation reduced_copy_is = copy(copy_is); + + for (int i = 0; i < n_dim; i++) { + if (i != 0) + reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var); + Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i); + + // extract stride + std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, bound.set_var(level-1+privatized_levels.size()+i+1)); + if (result.second != NULL) + index_stride[i] = abs(result.first.get_coef(result.second))/gcd(abs(result.first.get_coef(result.second)), abs(result.first.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)))); + else + index_stride[i] = 1; + + // check if this arary index requires loop + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (EQ_Iterator ei(c->EQs()); ei; ei++) { + if ((*ei).has_wildcards()) + continue; + + int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + if (coef != 0) { + int sign = 1; + if (coef < 0) { + coef = -coef; + sign = -1; + } + + CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*ei); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) + if ((*ci).coef*sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef*sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef*sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef*sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef*sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef*sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("unsupported array index expression"); + } + } + if ((*ei).get_const() != 0) + op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const()))); + if (coef != 1) + op = ocg->CreateIntegerFloor(op, ocg->CreateInt(coef)); + + index_lb[i] = op; + is_index_eq[i] = true; + break; + } + } + if (is_index_eq[i]) + continue; + + // seperate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + std::set<Variable_ID> excluded_floor_vars; + excluded_floor_vars.insert(bound.set_var(level-1+privatized_levels.size()+i+1)); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + if (coef != 0 && (*gi).has_wildcards()) { + bool clean_bound = true; + GEQ_Handle h; + for (Constr_Vars_Iter cvi(*gi, true); gi; gi++) + if (!find_floor_definition(bound, (*cvi).var, excluded_floor_vars).first) { + clean_bound = false; + break; + } + if (!clean_bound) + continue; + } + + if (coef > 0) + lb_list.push_back(*gi); + else if (coef < 0) + ub_list.push_back(*gi); + } + if (lb_list.size() == 0 || ub_list.size() == 0) + throw loop_error("failed to calcuate array footprint size"); + + // build lower bound representation + std::vector<CG_outputRepr *> lb_repr_list; + for (int j = 0; j < lb_list.size(); j++){ + if(this->known.n_set() == 0) + lb_repr_list.push_back(output_lower_bound_repr(ocg, lb_list[j], bound.set_var(level-1+privatized_levels.size()+i+1), result.first, result.second, bound, Relation::True(bound.n_set()), std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)))); + else + lb_repr_list.push_back(output_lower_bound_repr(ocg, lb_list[j], bound.set_var(level-1+privatized_levels.size()+i+1), result.first, result.second, bound, this->known, std::vector<std::pair<CG_outputRepr *, int> >(bound.n_set(), std::make_pair(static_cast<CG_outputRepr *>(NULL), 0)))); + } + if (lb_repr_list.size() > 1) + index_lb[i] = ocg->CreateInvoke("max", lb_repr_list); + else if (lb_repr_list.size() == 1) + index_lb[i] = lb_repr_list[0]; + + // build temporary array size representation + { + Relation cal(copy_is.n_set(), 1); + F_And *f_root = cal.add_and(); + for (int j = 0; j < ub_list.size(); j++) + for (int k = 0; k < lb_list.size(); k++) { + GEQ_Handle h = f_root->add_GEQ(); + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); + } + } + h.update_const(ub_list[j].get_const()); + + for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); + } + } + h.update_const(lb_list[k].get_const()); + + h.update_const(1); + h.update_coef(cal.output_var(1), -1); + } + + cal = Restrict_Domain(cal, copy(copy_is)); + for (int j = 1; j <= cal.n_inp(); j++) + cal = Project(cal, j, Input_Var); + cal.simplify(); + + // pad temporary array size + // TODO: for variable array size, create padding formula + Conjunct *c = cal.query_DNF()->single_conjunct(); + bool is_index_bound_const = false; + for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) + if ((*gi).is_const(cal.output_var(1))) { + coef_t size = (*gi).get_const() / (-(*gi).get_coef(cal.output_var(1))); + if (padding_stride != 0) { + size = (size + index_stride[i] - 1) / index_stride[i]; + if (i == fastest_changing_dimension) + size = size * padding_stride; + } + if (i == fastest_changing_dimension) { + if (padding_alignment > 1) { // align to boundary for data packing + int residue = size % padding_alignment; + if (residue) + size = size+padding_alignment-residue; + } + else if (padding_alignment < -1) { // un-alignment for memory bank conflicts + while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1) + size++; + } + } + index_sz.push_back(std::make_pair(i, ocg->CreateInt(size))); + is_index_bound_const = true; + } + + if (!is_index_bound_const) { + for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) { + int coef = (*gi).get_coef(cal.output_var(1)); + if (coef < 0) { + CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*gi); ci; ci++) { + if ((*ci).var != cal.output_var(1)) { + switch((*ci).var->kind()) { + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef == 1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef == -1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef > 1) + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name()))); + else // (*ci).coef < -1 + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("failed to generate array index bound code"); + } + } + } + int c = (*gi).get_const(); + if (c > 0) + op = ocg->CreatePlus(op, ocg->CreateInt(c)); + else if (c < 0) + op = ocg->CreateMinus(op, ocg->CreateInt(-c)); + if (padding_stride != 0) { + if (i == fastest_changing_dimension) { + coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[i] / g; + if (t1 != 1) + op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + op = ocg->CreateTimes(op, ocg->CreateInt(t2)); + } + else if (index_stride[i] != 1) { + op = ocg->CreateIntegerFloor(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i])); + } + } + + index_sz.push_back(std::make_pair(i, op)); + break; + } + } + } + } + } + + // change the temporary array index order + for (int i = 0; i < index_sz.size(); i++) + if (index_sz[i].first == fastest_changing_dimension) + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_ROW_MAJOR: + std::swap(index_sz[index_sz.size()-1], index_sz[i]); + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + std::swap(index_sz[0], index_sz[i]); + break; + default: + throw loop_error("unsupported array layout"); + } + + // declare temporary array or scalar + IR_Symbol *tmp_sym; + if (index_sz.size() == 0) { + tmp_sym = ir->CreateScalarSymbol(sym, memory_type); + } + else { + std::vector<CG_outputRepr *> tmp_array_size(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) + tmp_array_size[i] = index_sz[i].second->clone(); + tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type); + } + + // create temporary array read initialization code + CG_outputRepr *copy_code_read; + if (has_read_refs) + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert()); + } + else { + std::vector<CG_outputRepr *> lhs_index(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) { + int cur_index_num = index_sz[i].first; + CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (i == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + lhs_index[i] = cur_index_repr; + } + + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); + } + + // create temporary array write back code + CG_outputRepr *copy_code_write; + if (has_write_refs) + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert()); + } + else { + std::vector<CG_outputRepr *> lhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + lhs_index[i] = index_lb[i]->clone(); + else + lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index); + + std::vector<CG_outputRepr *> rhs_index(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) { + int cur_index_num = index_sz[i].first; + CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (i == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + rhs_index[i] = cur_index_repr; + } + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index); + + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert()); + } + + // now we can remove those loops for array indexes that are + // dependent on others + if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) { + Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size()); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + } + + int cur_index = 0; + std::vector<int> mapped_index(index_sz.size()); + for (int i = 0; i < n_dim; i++) + if (!is_index_eq[i]) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1); + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { + h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1); + mapped_index[index_sz.size()-cur_index-1] = i; + break; + } + case IR_ARRAY_LAYOUT_ROW_MAJOR: { + h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1); + mapped_index[cur_index] = i; + break; + } + default: + throw loop_error("unsupported array layout"); + } + cur_index++; + } + + wo_copy_is = Range(Restrict_Domain(copy(mapping), wo_copy_is)); + ro_copy_is = Range(Restrict_Domain(copy(mapping), ro_copy_is)); + for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + wo_copy_is.name_set_var(i, copy_is.set_var(i)->name()); + ro_copy_is.name_set_var(i, copy_is.set_var(i)->name()); + } + for (int i = 0; i < index_sz.size(); i++) { + wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + } + wo_copy_is.setup_names(); + ro_copy_is.setup_names(); + } + + // insert read copy statement + int old_num_stmt = stmt.size(); + int ro_copy_stmt_num = -1; + if (has_read_refs) { + Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1); + { + F_And *f_root = copy_xform.add_and(); + for (int i = 1; i <= ro_copy_is.n_set(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.input_var(i), 1); + h.update_coef(copy_xform.output_var(2*i), -1); + } + for (int i = 1; i <= dim; i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), -1); + h.update_const(lex[i-1]); + } + for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), 1); + } + } + + Statement copy_stmt_read; + copy_stmt_read.IS = ro_copy_is; + copy_stmt_read.xform = copy_xform; + copy_stmt_read.code = copy_code_read; + copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set()); + copy_stmt_read.ir_stmt_node = NULL; + for (int i = 0; i < level-1; i++) { + copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; + if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && + stmt[*(active.begin())].loop_level[i].payload >= level) { + int j; + for (j = 0; j < privatized_levels.size(); j++) + if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) + break; + if (j == privatized_levels.size()) + copy_stmt_read.loop_level[i].payload = -1; + else + copy_stmt_read.loop_level[i].payload = level + j; + } + else + copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; + copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; + } + for (int i = 0; i < privatized_levels.size(); i++) { + copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + } + int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); + for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + + + shiftLexicalOrder(lex, dim-1, 1); + stmt.push_back(copy_stmt_read); + ro_copy_stmt_num = stmt.size() - 1; + dep.insert(); + } + + // insert write copy statement + int wo_copy_stmt_num = -1; + if (has_write_refs) { + Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1); + { + F_And *f_root = copy_xform.add_and(); + for (int i = 1; i <= wo_copy_is.n_set(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.input_var(i), 1); + h.update_coef(copy_xform.output_var(2*i), -1); + } + for (int i = 1; i <= dim; i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), -1); + h.update_const(lex[i-1]); + } + for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), 1); + } + } + + Statement copy_stmt_write; + copy_stmt_write.IS = wo_copy_is; + copy_stmt_write.xform = copy_xform; + copy_stmt_write.code = copy_code_write; + copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set()); + copy_stmt_write.ir_stmt_node = NULL; + + for (int i = 0; i < level-1; i++) { + copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; + if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && + stmt[*(active.begin())].loop_level[i].payload >= level) { + int j; + for (j = 0; j < privatized_levels.size(); j++) + if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) + break; + if (j == privatized_levels.size()) + copy_stmt_write.loop_level[i].payload = -1; + else + copy_stmt_write.loop_level[i].payload = level + j; + } + else + copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; + copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; + } + for (int i = 0; i < privatized_levels.size(); i++) { + copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + } + int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); + for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + lex[dim-1]++; + shiftLexicalOrder(lex, dim-1, -2); + stmt.push_back(copy_stmt_write); + wo_copy_stmt_num = stmt.size() - 1; + dep.insert(); + } + + // replace original array accesses with temporary array accesses + for (int i =0; i < stmt_refs.size(); i++) + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert()); + } + else { + std::vector<CG_outputRepr *> index_repr(index_sz.size()); + for (int k = 0; k < index_sz.size(); k++) { + int cur_index_num = index_sz[k].first; + + CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (k == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerFloor(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + index_repr[k] = cur_index_repr; + } + + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr); + ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert()); + } + } + + // update dependence graph + int dep_dim = get_last_dep_dim_before(*(active.begin()), level) + 1; + if (ro_copy_stmt_num != -1) { + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::vector<DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { + if (active.find(i) != active.end() && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_R2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + dep.connect(ro_copy_stmt_num, j->first, dvs1); + } + else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_W2R)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + D.push_back(dvs1); + } + + if (j->second.size() == 0) + dep.vertex[i].second.erase(j++); + else + j++; + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, ro_copy_stmt_num, D[j]); + } + + // insert dependences from copy statement loop to copied statements + DependenceVector dv; + dv.type = DEP_W2R; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); + dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); + for (int i = dep_dim; i < dep.num_dim(); i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + dep.connect(ro_copy_stmt_num, *i, dv); + } + + if (wo_copy_stmt_num != -1) { + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::vector<DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { + if (active.find(i) != active.end() && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_W2R || dv.type == DEP_W2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + dep.connect(wo_copy_stmt_num, j->first, dvs1); + } + else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2W || dv.type == DEP_W2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + D.push_back(dvs1); + } + + if (j->second.size() == 0) + dep.vertex[i].second.erase(j++); + else + j++; + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, wo_copy_stmt_num, D[j]); + } + + // insert dependences from copied statements to write statements + DependenceVector dv; + dv.type = DEP_W2R; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); + dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); + for (int i = dep_dim; i < dep.num_dim(); i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + dep.connect(*i, wo_copy_stmt_num, dv); + + } + + // update variable name for dependences among copied statements + for (int i = 0; i < old_num_stmt; i++) { + if (active.find(i) != active.end()) + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) + if (active.find(j->first) != active.end()) + for (int k = 0; k < j->second.size(); k++) { + IR_Symbol *s = tmp_sym->clone(); + j->second[k].sym = s; + } + } + + // insert anti-dependence from write statement to read statement + if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1) + if (dep_dim >= 0) { + DependenceVector dv; + dv.type = DEP_R2W; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); + dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); + for (int k = dep_dim; k < dep.num_dim(); k++) { + dv.lbounds[k] = -posInfinity; + dv.ubounds[k] = posInfinity; + } + for (int k = 0; k < dep_dim; k++) { + if (k != 0) { + dv.lbounds[k-1] = 0; + dv.ubounds[k-1] = 0; + } + dv.lbounds[k] = 1; + dv.ubounds[k] = posInfinity; + dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv); + } + } + + // cleanup + delete sym; + delete tmp_sym; + for (int i = 0; i < index_lb.size(); i++) { + index_lb[i]->clear(); + delete index_lb[i]; + } + for (int i = 0; i < index_sz.size(); i++) { + index_sz[i].second->clear(); + delete index_sz[i].second; + } + + return true; +} diff --git a/loop_extra.cc b/loop_extra.cc new file mode 100644 index 0000000..2412403 --- /dev/null +++ b/loop_extra.cc @@ -0,0 +1,224 @@ +/***************************************************************************** + Copyright (C) 2010 University of Utah + All Rights Reserved. + + Purpose: + Additional loop transformations. + + Notes: + + History: + 07/31/10 Created by Chun Chen +*****************************************************************************/ + +#include <codegen.h> +#include <code_gen/CG_utils.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + + +void Loop::shift_to(int stmt_num, int level, int absolute_position) { + // combo + tile(stmt_num, level, 1, level, CountedTile); + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> active = getStatements(lex, 2*level-2); + shift(active, level, absolute_position); + + // remove unnecessary tiled loop since tile size is one + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation mapping(n, n-2); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= 2*level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 2*level+3; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j-2), 1); + h.update_coef(mapping.input_var(j), -1); + } + stmt[*i].xform = Composition(mapping, stmt[*i].xform); + stmt[*i].xform.simplify(); + + for (int j = 0; j < stmt[*i].loop_level.size(); j++) + if (j != level-1 && + stmt[*i].loop_level[j].type == LoopLevelTile && + stmt[*i].loop_level[j].payload >= level) + stmt[*i].loop_level[j].payload--; + + stmt[*i].loop_level.erase(stmt[*i].loop_level.begin()+level-1); + } +} + + +std::set<int> Loop::unroll_extra(int stmt_num, int level, int unroll_amount, int cleanup_split_level) { + std::set<int> cleanup_stmts = unroll(stmt_num, level, unroll_amount,std::vector< std::vector<std::string> >(), cleanup_split_level); + for (std::set<int>::iterator i = cleanup_stmts.begin(); i != cleanup_stmts.end(); i++) + unroll(*i, level, 0); + + return cleanup_stmts; +} + +void Loop::peel(int stmt_num, int level, int peel_amount) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + if (peel_amount == 0) + return; + + std::set<int> subloop = getSubLoopNest(stmt_num, level); + std::vector<Relation> Rs; + for (std::set<int>::iterator i = subloop.begin(); i != subloop.end(); i++) { + Relation r = getNewIS(*i); + Relation f(r.n_set(), level); + F_And *f_root = f.add_and(); + for (int j = 1; j <= level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(f.input_var(2*j), 1); + h.update_coef(f.output_var(j), -1); + } + r = Composition(f, r); + r.simplify(); + Rs.push_back(r); + } + Relation hull = SimpleHull(Rs); + + if (peel_amount > 0) { + GEQ_Handle bound_eq; + bool found_bound = false; + for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) + if (!(*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) > 0) { + bound_eq = *e; + found_bound = true; + break; + } + if (!found_bound) + for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) + if ((*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) > 0) { + bool is_bound = true; + for (Constr_Vars_Iter cvi(*e, true); cvi; cvi++) { + std::pair<bool, GEQ_Handle> result = find_floor_definition(hull, cvi.curr_var()); + if (!result.first) { + is_bound = false; + break; + } + } + if (is_bound) { + bound_eq = *e; + found_bound = true; + break; + } + } + if (!found_bound) + throw loop_error("can't find lower bound for peeling at loop level " + to_string(level)); + + for (int i = 1; i <= peel_amount; i++) { + Relation r(level); + F_Exists *f_exists = r.add_and()->add_exists(); + F_And *f_root = f_exists->add_and(); + GEQ_Handle h = f_root->add_GEQ(); + std::map<Variable_ID, Variable_ID> exists_mapping; + for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++) + switch (cvi.curr_var()->kind()) { + case Input_Var: + h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef()); + break; + case Wildcard_Var: { + Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping); + h.update_coef(v, cvi.curr_coef()); + break; + } + case Global_Var: { + Global_Var_ID g = cvi.curr_var()->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = r.get_local(g); + else + v = r.get_local(g, cvi.curr_var()->function_of()); + h.update_coef(v, cvi.curr_coef()); + break; + } + default: + assert(false); + } + h.update_const(bound_eq.get_const() - i); + r.simplify(); + + split(stmt_num, level, r); + } + } + else { // peel_amount < 0 + GEQ_Handle bound_eq; + bool found_bound = false; + for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) + if (!(*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) < 0) { + bound_eq = *e; + found_bound = true; + break; + } + if (!found_bound) + for (GEQ_Iterator e(hull.single_conjunct()->GEQs()); e; e++) + if ((*e).has_wildcards() && (*e).get_coef(hull.set_var(level)) < 0) { + bool is_bound = true; + for (Constr_Vars_Iter cvi(*e, true); cvi; cvi++) { + std::pair<bool, GEQ_Handle> result = find_floor_definition(hull, cvi.curr_var()); + if (!result.first) { + is_bound = false; + break; + } + } + if (is_bound) { + bound_eq = *e; + found_bound = true; + break; + } + } + if (!found_bound) + throw loop_error("can't find upper bound for peeling at loop level " + to_string(level)); + + for (int i = 1; i <= -peel_amount; i++) { + Relation r(level); + F_Exists *f_exists = r.add_and()->add_exists(); + F_And *f_root = f_exists->add_and(); + GEQ_Handle h = f_root->add_GEQ(); + std::map<Variable_ID, Variable_ID> exists_mapping; + for (Constr_Vars_Iter cvi(bound_eq); cvi; cvi++) + switch (cvi.curr_var()->kind()) { + case Input_Var: + h.update_coef(r.set_var(cvi.curr_var()->get_position()), cvi.curr_coef()); + break; + case Wildcard_Var: { + Variable_ID v = replicate_floor_definition(hull, cvi.curr_var(), r, f_exists, f_root, exists_mapping); + h.update_coef(v, cvi.curr_coef()); + break; + } + case Global_Var: { + Global_Var_ID g = cvi.curr_var()->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = r.get_local(g); + else + v = r.get_local(g, cvi.curr_var()->function_of()); + h.update_coef(v, cvi.curr_coef()); + break; + } + default: + assert(false); + } + h.update_const(bound_eq.get_const() - i); + r.simplify(); + + split(stmt_num, level, r); + } + } +} + diff --git a/loop_modified.cc b/loop_modified.cc new file mode 100644 index 0000000..9686f6d --- /dev/null +++ b/loop_modified.cc @@ -0,0 +1,4234 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Core loop transformation functionality. + + Notes: + "level" (starting from 1) means loop level and it corresponds to "dim" + (starting from 0) in transformed iteration space [c_1,l_1,c_2,l_2,...., + c_n,l_n,c_(n+1)], e.g., l_2 is loop level 2 in generated code, dim 3 + in transformed iteration space, and variable 4 in Omega relation. + All c's are constant numbers only and they will not show up as actual loops. + Formula: + dim = 2*level - 1 + var = dim + 1 + + History: + 10/2005 Created by Chun Chen. + 09/2009 Expand tile functionality, -chun + 10/2009 Initialize unfusible loop nest without bailing out, -chun +*****************************************************************************/ + +#include <limits.h> +#include <math.h> +#include <code_gen/code_gen.h> +#include <code_gen/CG_outputBuilder.h> +#include <code_gen/output_repr.h> +#include <iostream> +#include <map> +#include "loop.hh" +#include "omegatools.hh" +#include "irtools.hh" +#include "chill_error.hh" +#include <string.h> +using namespace omega; + +const std::string Loop::tmp_loop_var_name_prefix = std::string("_t"); +const std::string Loop::overflow_var_name_prefix = std::string("over"); + +//----------------------------------------------------------------------------- +// Class Loop +//----------------------------------------------------------------------------- + +bool Loop::init_loop(std::vector<ir_tree_node *> &ir_tree, + std::vector<ir_tree_node *> &ir_stmt) { + ir_stmt = extract_ir_stmts(ir_tree); + stmt_nesting_level_.resize(ir_stmt.size()); + std::vector<int> stmt_nesting_level(ir_stmt.size()); + for (int i = 0; i < ir_stmt.size(); i++) { + ir_stmt[i]->payload = i; + int t = 0; + ir_tree_node *itn = ir_stmt[i]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + t++; + } + stmt_nesting_level_[i] = t; + stmt_nesting_level[i] = t; + } + + stmt = std::vector<Statement>(ir_stmt.size()); + int n_dim = -1; + int max_loc; + //std::vector<std::string> index; + for (int i = 0; i < ir_stmt.size(); i++) { + int max_nesting_level = -1; + int loc; + for (int j = 0; j < ir_stmt.size(); j++) + if (stmt_nesting_level[j] > max_nesting_level) { + max_nesting_level = stmt_nesting_level[j]; + loc = j; + } + + // most deeply nested statement acting as a reference point + if (n_dim == -1) { + n_dim = max_nesting_level; + max_loc = loc; + + index = std::vector<std::string>(n_dim); + + ir_tree_node *itn = ir_stmt[loc]; + int cur_dim = n_dim - 1; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) { + index[cur_dim] = + static_cast<IR_Loop *>(itn->content)->index()->name(); + itn->payload = cur_dim--; + } + } + } + + // align loops by names, temporary solution + ir_tree_node *itn = ir_stmt[loc]; + int depth = stmt_nesting_level_[loc] - 1; + /* while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { + std::string name = static_cast<IR_Loop *>(itn->content)->index()->name(); + for (int j = 0; j < n_dim; j++) + if (index[j] == name) { + itn->payload = j; + break; + } + if (itn->payload == -1) + throw loop_error("no complex alignment yet"); + } + } + */ + for (int t = depth; t >= 0; t--) { + int y = t; + ir_tree_node *itn = ir_stmt[loc]; + + while ((itn->parent != NULL) && (y >= 0)) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + y--; + } + + if (itn->content->type() == IR_CONTROL_LOOP && itn->payload == -1) { + CG_outputBuilder *ocg = ir_->builder(); + + itn->payload = depth - t; + + CG_outputRepr *code = + static_cast<IR_Block *>(ir_stmt[loc]->content)->extract(); + + Tuple<CG_outputRepr *> index_expr; + Tuple<std::string> old_index; + CG_outputRepr *repl = ocg->CreateIdent(index[itn->payload]); + index_expr.append(repl); + old_index.append( + static_cast<IR_Loop *>(itn->content)->index()->name()); + + code = ocg->CreatePlaceHolder(0, code, index_expr, old_index); + replace.insert(std::pair<int, CG_outputRepr*>(loc, code)); + //stmt[loc].code = code; + + } + } + + // set relation variable names + Relation r(n_dim); + F_And *f_root = r.add_and(); + itn = ir_stmt[loc]; + int temp_depth = depth; + while (itn->parent != NULL) { + + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) { + r.name_set_var(itn->payload + 1, index[temp_depth]); + + temp_depth--; + } + //static_cast<IR_Loop *>(itn->content)->index()->name()); + } + + /*while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP) + r.name_set_var(itn->payload+1, static_cast<IR_Loop *>(itn->content)->index()->name()); + }*/ + + // extract information from loop/if structures + std::vector<bool> processed(n_dim, false); + Tuple<std::string> vars_to_be_reversed; + itn = ir_stmt[loc]; + while (itn->parent != NULL) { + itn = itn->parent; + + switch (itn->content->type()) { + case IR_CONTROL_LOOP: { + IR_Loop *lp = static_cast<IR_Loop *>(itn->content); + Variable_ID v = r.set_var(itn->payload + 1); + int c; + + try { + c = lp->step_size(); + if (c > 0) { + CG_outputRepr *lb = lp->lower_bound(); + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, true); + CG_outputRepr *ub = lp->upper_bound(); + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_LT || cond == IR_COND_LE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + cond, true); + else + throw ir_error("loop condition not supported"); + + } else if (c < 0) { + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *lb = lp->lower_bound(); + lb = ocg->CreateMinus(NULL, lb); + exp2formula(ir, r, f_root, freevar, lb, v, 's', + IR_COND_GE, true); + CG_outputRepr *ub = lp->upper_bound(); + ub = ocg->CreateMinus(NULL, ub); + IR_CONDITION_TYPE cond = lp->stop_cond(); + if (cond == IR_COND_GE) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + IR_COND_LE, true); + else if (cond == IR_COND_GT) + exp2formula(ir, r, f_root, freevar, ub, v, 's', + IR_COND_LT, true); + else + throw ir_error("loop condition not supported"); + + vars_to_be_reversed.append(lp->index()->name()); + } else + throw ir_error("loop step size zero"); + } catch (const ir_error &e) { + for (int i = 0; i < itn->children.size(); i++) + delete itn->children[i]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + return false; + } + + if (abs(c) != 1) { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e = f_exists->declare(); + F_And *f_and = f_exists->add_and(); + Stride_Handle h = f_and->add_stride(abs(c)); + if (c > 0) + h.update_coef(e, 1); + else + h.update_coef(e, -1); + h.update_coef(v, -1); + CG_outputRepr *lb = lp->lower_bound(); + exp2formula(ir, r, f_and, freevar, lb, e, 's', IR_COND_EQ, + true); + } + + processed[itn->payload] = true; + break; + } + case IR_CONTROL_IF: { + CG_outputRepr *cond = + static_cast<IR_If *>(itn->content)->condition(); + try { + if (itn->payload % 2 == 1) + exp2constraint(ir, r, f_root, freevar, cond, true); + else { + F_Not *f_not = f_root->add_not(); + F_And *f_and = f_not->add_and(); + exp2constraint(ir, r, f_and, freevar, cond, true); + } + } catch (const ir_error &e) { + std::vector<ir_tree_node *> *t; + if (itn->parent == NULL) + t = &ir_tree; + else + t = &(itn->parent->children); + int id = itn->payload; + int i = t->size() - 1; + while (i >= 0) { + if ((*t)[i] == itn) { + for (int j = 0; j < itn->children.size(); j++) + delete itn->children[j]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + } else if ((*t)[i]->payload >> 1 == id >> 1) { + delete (*t)[i]; + t->erase(t->begin() + i); + } + i--; + } + return false; + } + + break; + } + default: + for (int i = 0; i < itn->children.size(); i++) + delete itn->children[i]; + itn->children = std::vector<ir_tree_node *>(); + itn->content = itn->content->convert(); + return false; + } + } + + // add information for missing loops + for (int j = 0; j < n_dim; j++) + if (!processed[j]) { + ir_tree_node *itn = ir_stmt[max_loc]; + while (itn->parent != NULL) { + itn = itn->parent; + if (itn->content->type() == IR_CONTROL_LOOP + && itn->payload == j) + break; + } + + Variable_ID v = r.set_var(j + 1); + if (loc < max_loc) { + CG_outputRepr *lb = + static_cast<IR_Loop *>(itn->content)->lower_bound(); + exp2formula(ir, r, f_root, freevar, lb, v, 's', IR_COND_EQ, + true); + } else { // loc > max_loc + CG_outputRepr *ub = + static_cast<IR_Loop *>(itn->content)->upper_bound(); + exp2formula(ir, r, f_root, freevar, ub, v, 's', IR_COND_EQ, + true); + } + } + + r.setup_names(); + r.simplify(); + + // insert the statement + CG_outputBuilder *ocg = ir->builder(); + Tuple<CG_outputRepr *> reverse_expr; + for (int j = 1; j <= vars_to_be_reversed.size(); j++) { + CG_outputRepr *repl = ocg->CreateIdent(vars_to_be_reversed[j]); + repl = ocg->CreateMinus(NULL, repl); + reverse_expr.append(repl); + } + CG_outputRepr *code = + static_cast<IR_Block *>(ir_stmt[loc]->content)->original(); + code = ocg->CreatePlaceHolder(0, code, reverse_expr, + vars_to_be_reversed); + stmt[loc].code = code; + stmt[loc].IS = r; + stmt[loc].loop_level = std::vector<LoopLevel>(n_dim); + for (int i = 0; i < n_dim; i++) { + stmt[loc].loop_level[i].type = LoopLevelOriginal; + stmt[loc].loop_level[i].payload = i; + stmt[loc].loop_level[i].parallel_level = 0; + } + + stmt_nesting_level[loc] = -1; + } + + return true; +} + +Loop::Loop(const IR_Control *control) { + ir = const_cast<IR_Code *>(control->ir_); + init_code = NULL; + cleanup_code = NULL; + tmp_loop_var_name_counter = 1; + overflow_var_name_counter = 1; + known = Relation::True(0); + + std::vector<ir_tree_node *> ir_tree = build_ir_tree(control->clone(), NULL); + std::vector<ir_tree_node *> ir_stmt; + + while (!init_loop(ir_tree, ir_stmt)) { + } + + // init the dependence graph + for (int i = 0; i < stmt.size(); i++) + dep.insert(); + + for (int i = 0; i < stmt.size(); i++) + for (int j = i; j < stmt.size(); j++) { + std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = test_data_dependences( + ir_, stmt[i].code, stmt[i].IS, stmt[j].code, stmt[j].IS, + freevar, index, stmt_nesting_level_[i], + stmt_nesting_level[j]); + + for (int k = 0; k < dv.first.size(); k++) { + if (is_dependence_valid(ir_stmt[i], ir_stmt[j], dv.first[k], + true)) + dep.connect(i, j, dv.first[k]); + else { + dep.connect(j, i, dv.first[k].reverse()); + } + + } + for (int k = 0; k < dv.second.size(); k++) + if (is_dependence_valid(ir_stmt[j], ir_stmt[i], dv.second[k], + false)) + dep.connect(j, i, dv.second[k]); + else { + dep.connect(i, j, dv.second[k].reverse()); + } + // std::pair<std::vector<DependenceVector>, + // std::vector<DependenceVector> > dv_ = test_data_dependences( + + } + + for (int i = 0; i < stmt.size(); i++) { + std::map<int, CG_outputRepr*>::iterator it = replace.find(i); + + if (it != replace.end()) + stmt[i].code = (it->second)->clone(); + else + stmt[i].code = stmt[i].code->clone(); + } + + // cleanup the IR tree + for (int i = 0; i < ir_tree.size(); i++) + delete ir_tree[i]; + + // init dumb transformation relations e.g. [i, j] -> [ 0, i, 0, j, 0] + for (int i = 0; i < stmt.size(); i++) { + int n = stmt[i].IS.n_set(); + stmt[i].xform = Relation(n, 2 * n + 1); + F_And *f_root = stmt[i].xform.add_and(); + + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(stmt[i].xform.output_var(2 * j), 1); + h.update_coef(stmt[i].xform.input_var(j), -1); + } + + for (int j = 1; j <= 2 * n + 1; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(stmt[i].xform.output_var(j), 1); + } + stmt[i].xform.simplify(); + } + + if (stmt.size() != 0) + num_dep_dim = stmt[0].IS.n_set(); + else + num_dep_dim = 0; +} + +Loop::~Loop() { + for (int i = 0; i < stmt.size(); i++) + if (stmt[i].code != NULL) { + stmt[i].code->clear(); + delete stmt[i].code; + } + if (init_code != NULL) { + init_code->clear(); + delete init_code; + } + if (cleanup_code != NULL) { + cleanup_code->clear(); + delete cleanup_code; + } +} + +int Loop::get_dep_dim_of(int stmt_num, int level) const { + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invaid statement " + to_string(stmt_num)); + + if (level < 1 || level > stmt[stmt_num].loop_level.size()) + return -1; + + int trip_count = 0; + while (true) { + switch (stmt[stmt_num].loop_level[level - 1].type) { + case LoopLevelOriginal: + return stmt[stmt_num].loop_level[level - 1].payload; + case LoopLevelTile: + level = stmt[stmt_num].loop_level[level - 1].payload; + if (level < 1) + return -1; + if (level > stmt[stmt_num].loop_level.size()) + throw loop_error( + "incorrect loop level information for statement " + + to_string(stmt_num)); + break; + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(stmt_num)); + } + trip_count++; + if (trip_count >= stmt[stmt_num].loop_level.size()) + throw loop_error( + "incorrect loop level information for statement " + + to_string(stmt_num)); + } +} + +int Loop::get_last_dep_dim_before(int stmt_num, int level) const { + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invaid statement " + to_string(stmt_num)); + + if (level < 1) + return -1; + if (level > stmt[stmt_num].loop_level.size()) + level = stmt[stmt_num].loop_level.size() + 1; + + for (int i = level - 1; i >= 1; i--) + if (stmt[stmt_num].loop_level[i - 1].type == LoopLevelOriginal) + return stmt[stmt_num].loop_level[i - 1].payload; + + return -1; +} + +void Loop::print_internal_loop_structure() const { + for (int i = 0; i < stmt.size(); i++) { + std::vector<int> lex = getLexicalOrder(i); + std::cout << "s" << i + 1 << ": "; + for (int j = 0; j < stmt[i].loop_level.size(); j++) { + if (2 * j < lex.size()) + std::cout << lex[2 * j]; + switch (stmt[i].loop_level[j].type) { + case LoopLevelOriginal: + std::cout << "(dim:" << stmt[i].loop_level[j].payload << ")"; + break; + case LoopLevelTile: + std::cout << "(tile:" << stmt[i].loop_level[j].payload << ")"; + break; + default: + std::cout << "(unknown)"; + } + std::cout << ' '; + } + for (int j = 2 * stmt[i].loop_level.size(); j < lex.size(); j += 2) { + std::cout << lex[j]; + if (j != lex.size() - 1) + std::cout << ' '; + } + std::cout << std::endl; + } +} + +CG_outputRepr *Loop::getCode(int effort) const { + const int m = stmt.size(); + if (m == 0) + return NULL; + const int n = stmt[0].xform.n_out(); + + Tuple<CG_outputRepr *> ni(m); + Tuple < Relation > IS(m); + Tuple < Relation > xform(m); + for (int i = 0; i < m; i++) { + ni[i + 1] = stmt[i].code; + IS[i + 1] = stmt[i].IS; + xform[i + 1] = stmt[i].xform; + } + + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *repr = MMGenerateCode(ocg, xform, IS, ni, known, effort); + + if (init_code != NULL) + repr = ocg->StmtListAppend(init_code->clone(), repr); + if (cleanup_code != NULL) + repr = ocg->StmtListAppend(repr, cleanup_code->clone()); + + return repr; +} + +void Loop::printCode(int effort) const { + const int m = stmt.size(); + if (m == 0) + return; + const int n = stmt[0].xform.n_out(); + + Tuple < Relation > IS(m); + Tuple < Relation > xform(m); + for (int i = 0; i < m; i++) { + IS[i + 1] = stmt[i].IS; + xform[i + 1] = stmt[i].xform; + } + + Relation known = Extend_Set(copy(this->known), n - this->known.n_set()); + std::cout << MMGenerateCode(xform, IS, known, effort); +} + +Relation Loop::getNewIS(int stmt_num) const { + Relation result; + + if (stmt[stmt_num].xform.is_null()) { + Relation known = Extend_Set(copy(this->known), + stmt[stmt_num].IS.n_set() - this->known.n_set()); + result = Intersection(copy(stmt[stmt_num].IS), known); + } else { + Relation known = Extend_Set(copy(this->known), + stmt[stmt_num].xform.n_out() - this->known.n_set()); + result = Intersection( + Range( + Restrict_Domain(copy(stmt[stmt_num].xform), + copy(stmt[stmt_num].IS))), known); + } + + result.simplify(2, 4); + + return result; +} + +std::vector<Relation> Loop::getNewIS() const { + const int m = stmt.size(); + + std::vector<Relation> new_IS(m); + for (int i = 0; i < m; i++) + new_IS[i] = getNewIS(i); + + return new_IS; +} + +void Loop::permute(const std::vector<int> &pi) { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + + permute(active, pi); +} + +void Loop::original() { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + setLexicalOrder(0, active); +} + +void Loop::permute(const std::set<int> &active, const std::vector<int> &pi) { + if (active.size() == 0 || pi.size() == 0) + return; + + // check for sanity of parameters + int level = pi[0]; + for (int i = 1; i < pi.size(); i++) + if (pi[i] < level) + level = pi[i]; + if (level < 1) + throw std::invalid_argument("invalid permuation"); + std::vector<int> reverse_pi(pi.size(), 0); + for (int i = 0; i < pi.size(); i++) + if (pi[i] >= level + pi.size()) + throw std::invalid_argument("invalid permutation"); + else + reverse_pi[pi[i] - level] = i + level; + for (int i = 0; i < reverse_pi.size(); i++) + if (reverse_pi[i] == 0) + throw std::invalid_argument("invalid permuation"); + int ref_stmt_num; + std::vector<int> lex; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(*i)); + if (i == active.begin()) { + ref_stmt_num = *i; + lex = getLexicalOrder(*i); + } else { + if (level + pi.size() - 1 > stmt[*i].loop_level.size()) + throw std::invalid_argument("invalid permuation"); + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 0; j < 2 * level - 3; j += 2) + if (lex[j] != lex2[j]) + throw std::invalid_argument( + "statements to permute must be in the same subloop"); + for (int j = 0; j < pi.size(); j++) + if (!(stmt[*i].loop_level[level + j - 1].type + == stmt[ref_stmt_num].loop_level[level + j - 1].type + && stmt[*i].loop_level[level + j - 1].payload + == stmt[ref_stmt_num].loop_level[level + j - 1].payload)) + throw std::invalid_argument( + "permuted loops must have the same loop level types"); + } + } + + // Update transformation relations + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= n; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 0; j < pi.size(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * (level + j)), 1); + h.update_coef(mapping.input_var(2 * pi[j]), -1); + } + for (int j = 1; j < level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + for (int j = level + pi.size(); j <= stmt[*i].loop_level.size(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + + stmt[*i].xform = Composition(mapping, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // get the permuation for dependence vectors + std::vector<int> t; + for (int i = 0; i < pi.size(); i++) + if (stmt[ref_stmt_num].loop_level[pi[i] - 1].type == LoopLevelOriginal) + t.push_back(stmt[ref_stmt_num].loop_level[pi[i] - 1].payload); + int max_dep_dim = -1; + int min_dep_dim = num_dep_dim; + for (int i = 0; i < t.size(); i++) { + if (t[i] > max_dep_dim) + max_dep_dim = t[i]; + if (t[i] < min_dep_dim) + min_dep_dim = t[i]; + } + if (min_dep_dim > max_dep_dim) + return; + if (max_dep_dim - min_dep_dim + 1 != t.size()) + throw loop_error("cannot update the dependence graph after permuation"); + std::vector<int> dep_pi(num_dep_dim); + for (int i = 0; i < min_dep_dim; i++) + dep_pi[i] = i; + for (int i = min_dep_dim; i <= max_dep_dim; i++) + dep_pi[i] = t[i - min_dep_dim]; + for (int i = max_dep_dim + 1; i < num_dep_dim; i++) + dep_pi[i] = i; + + // update the dependence graph + DependenceGraph g; + for (int i = 0; i < dep.vertex.size(); i++) + g.insert(); + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { + if ((active.find(i) != active.end() + && active.find(j->first) != active.end())) { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) { + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(num_dep_dim); + std::vector<coef_t> ubounds(num_dep_dim); + for (int d = 0; d < num_dep_dim; d++) { + lbounds[d] = dv[k].lbounds[dep_pi[d]]; + ubounds[d] = dv[k].ubounds[dep_pi[d]]; + } + dv[k].lbounds = lbounds; + dv[k].ubounds = ubounds; + break; + } + case DEP_CONTROL: { + break; + } + default: + throw loop_error("unknown dependence type"); + } + } + g.connect(i, j->first, dv); + } else if (active.find(i) == active.end() + && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dv = j->second; + g.connect(i, j->first, dv); + } else { + std::vector<DependenceVector> dv = j->second; + for (int k = 0; k < dv.size(); k++) + switch (dv[k].type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + for (int d = 0; d < num_dep_dim; d++) + if (dep_pi[d] != d) { + dv[k].lbounds[d] = -posInfinity; + dv[k].ubounds[d] = posInfinity; + } + break; + } + case DEP_CONTROL: + break; + default: + throw loop_error("unknown dependence type"); + } + g.connect(i, j->first, dv); + } + } + dep = g; + + // update loop level information + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int cur_dep_dim = min_dep_dim; + std::vector<LoopLevel> new_loop_level(stmt[*i].loop_level.size()); + for (int j = 1; j <= stmt[*i].loop_level.size(); j++) + if (j >= level && j < level + pi.size()) { + switch (stmt[*i].loop_level[reverse_pi[j - level] - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = cur_dep_dim++; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[reverse_pi[j - level] + - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = reverse_pi[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = + stmt[*i].loop_level[reverse_pi[j - level] - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } else { + switch (stmt[*i].loop_level[j - 1].type) { + case LoopLevelOriginal: + new_loop_level[j - 1].type = LoopLevelOriginal; + new_loop_level[j - 1].payload = + stmt[*i].loop_level[j - 1].payload; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + case LoopLevelTile: { + new_loop_level[j - 1].type = LoopLevelTile; + int ref_level = stmt[*i].loop_level[j - 1].payload; + if (ref_level >= level && ref_level < level + pi.size()) + new_loop_level[j - 1].payload = reverse_pi[ref_level + - level]; + else + new_loop_level[j - 1].payload = ref_level; + new_loop_level[j - 1].parallel_level = stmt[*i].loop_level[j + - 1].parallel_level; + break; + } + default: + throw loop_error( + "unknown loop level information for statement " + + to_string(*i)); + } + } + stmt[*i].loop_level = new_loop_level; + } + + setLexicalOrder(2 * level - 2, active); +} + +std::set<int> Loop::split(int stmt_num, int level, const Relation &cond) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + std::set<int> result; + int dim = 2 * level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim - 1); + + Relation cond2 = copy(cond); + cond2.simplify(); + cond2 = EQs_to_GEQs(cond2); + Conjunct *c = cond2.single_conjunct(); + int cur_lex = lex[dim - 1]; + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int max_level = (*gi).max_tuple_pos(); + Relation single_cond(max_level); + single_cond.and_with_GEQ(*gi); + + // TODO: should decide where to place newly created statements with + // complementary split condition from dependence graph. + bool place_after; + if (max_level == 0) + place_after = true; + else if ((*gi).get_coef(cond2.set_var(max_level)) < 0) + place_after = true; + else + place_after = false; + + // original statements with split condition, + // new statements with complement of split condition + int old_num_stmt = stmt.size(); + std::map<int, int> what_stmt_num; + apply_xform(same_loop); + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + int n = stmt[*i].IS.n_set(); + Relation part1, part2; + if (max_level > n) { + part1 = copy(stmt[*i].IS); + part2 = Relation::False(0); + } else { + part1 = Intersection(copy(stmt[*i].IS), + Extend_Set(copy(single_cond), n - max_level)); + part2 = Intersection(copy(stmt[*i].IS), + Extend_Set(Complement(copy(single_cond)), + n - max_level)); + } + + //split dependence check + + if (max_level > level) { + + DNF_Iterator di1(stmt[*i].IS.query_DNF()); + DNF_Iterator di2(part1.query_DNF()); + for (; di1 && di2; di1++, di2++) { + //printf("In next conjunct,\n"); + EQ_Iterator ei1 = (*di1)->EQs(); + EQ_Iterator ei2 = (*di2)->EQs(); + for (; ei1 && ei2; ei1++, ei2++) { + //printf(" In next equality constraint,\n"); + Constr_Vars_Iter cvi1(*ei1); + Constr_Vars_Iter cvi2(*ei2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*ei1).get_const() + != (*ei2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type + == LoopLevelTile) + dimension = xform_index[dimension].first; + + dimension = stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if ((dv.hasNegative(dimension) + && !dv.quasi) + || (dv.hasPositive(dimension) + && dv.quasi)) + + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + GEQ_Iterator gi1 = (*di1)->GEQs(); + GEQ_Iterator gi2 = (*di2)->GEQs(); + + for (; gi1 && gi2; gi++, gi2++) { + + Constr_Vars_Iter cvi1(*gi1); + Constr_Vars_Iter cvi2(*gi2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*gi1).get_const() + != (*gi2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type + == LoopLevelTile) + dimension = xform_index[dimension].first; + + dimension = + stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); + j++) { + for (int k = 0; k < j->second.size(); + k++) { + DependenceVector dv = j->second[k]; + if ((dv.hasNegative(dimension) + && !dv.quasi) + || (dv.hasPositive( + dimension) + && dv.quasi)) + + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + } + + } + + } + + DNF_Iterator di3(stmt[*i].IS.query_DNF()); + DNF_Iterator di4(part2.query_DNF()); + for (; di3 && di4; di3++, di4++) { + EQ_Iterator ei1 = (*di3)->EQs(); + EQ_Iterator ei2 = (*di4)->EQs(); + for (; ei1 && ei2; ei1++, ei2++) { + Constr_Vars_Iter cvi1(*ei1); + Constr_Vars_Iter cvi2(*ei2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*ei1).get_const() + != (*ei2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type + == LoopLevelTile) + dimension = xform_index[dimension].first; + + dimension = stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if ((dv.hasNegative(dimension) + && !dv.quasi) + || (dv.hasPositive(dimension) + && dv.quasi)) + + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + } + GEQ_Iterator gi1 = (*di3)->GEQs(); + GEQ_Iterator gi2 = (*di4)->GEQs(); + + for (; gi1 && gi2; gi++, gi2++) { + Constr_Vars_Iter cvi1(*gi1); + Constr_Vars_Iter cvi2(*gi2); + int dimension = (*cvi1).var->get_position(); + int same = 0; + bool identical = false; + if (identical = !strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name())) { + + for (; cvi1 && cvi2; cvi1++, cvi2++) { + + if (((*cvi1).coef != (*cvi2).coef + || (*gi1).get_const() + != (*gi2).get_const()) + || (strcmp((*cvi1).var->char_name(), + (*cvi2).var->char_name()))) { + + same++; + } + } + } + if ((same != 0) || !identical) { + dimension = dimension - 1; + + while (stmt[*i].loop_level[dimension].type + == LoopLevelTile) + dimension = xform_index[dimension].first; + + dimension = stmt[*i].loop_level[dimension].payload; + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if ((dv.hasNegative(dimension) + && !dv.quasi) + || (dv.hasPositive(dimension) + && dv.quasi)) + + throw loop_error( + "loop error: Split is illegal, dependence violation!"); + + } + } + } + + } + + } + + } + + } + + stmt[*i].IS = part1; + + if (Intersection(copy(part2), + Extend_Set(copy(this->known), n - this->known.n_set())).is_upper_bound_satisfiable()) { + Statement new_stmt; + new_stmt.code = stmt[*i].code->clone(); + new_stmt.IS = part2; + new_stmt.xform = copy(stmt[*i].xform); + + new_stmt.loop_level = stmt[*i].loop_level; + stmt.push_back(new_stmt); + dep.insert(); + what_stmt_num[*i] = stmt.size() - 1; + if (*i == stmt_num) + result.insert(stmt.size() - 1); + + stmt_nesting_level_.push_back(stmt_nesting_level[*i]); + std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv = + test_data_dependences(ir_, stmt[*i].code, part1, + stmt[*i].code, part2, freevar, index, + stmt_nesting_level[*i], + stmt_nesting_level[stmt.size() - 1]); + + int part1_to_part2 = 0; + int part2_to_part1 = 0; + + for (int k = 0; k < dv.first.size(); k++) + if (is_dependence_valid_based_on_lex_order(*i, + what_stmt_num[*i], dv.first[k], true)) + part1_to_part2++; + else + part2_to_part1++; + + if (part1_to_part2 > 0 && part2_to_part1 > 0) + throw loop_error( + "loop error: Aborting, split resulted in impossible dependence cycle!"); + + for (int k = 0; k < dv.second.size(); k++) + if (is_dependence_valid_based_on_lex_order( + what_stmt_num[*i], *i, dv.second[k], false)) + part2_to_part1++; + + else + part1_to_part2++; + + if (part1_to_part2 > 0 && part2_to_part1 > 0) + throw loop_error( + "loop error: Aborting, split resulted in impossible dependence cycle!"); + bool temp_place_after; + if (part2_to_part1 > 0) + temp_place_after = false; + else + temp_place_after = true; + + if (i == same_loop.begin()) + place_after = temp_place_after; + else { + if (temp_place_after != place_after) + throw loop_error( + "loop error: Aborting, split resulted in impossible dependence cycle!"); + + } + + if (place_after) + assign_const(new_stmt.xform, dim - 1, cur_lex + 1); + else + assign_const(new_stmt.xform, dim - 1, cur_lex - 1); + + } + + } + // make adjacent lexical number available for new statements + if (place_after) { + lex[dim - 1] = cur_lex + 1; + shiftLexicalOrder(lex, dim - 1, 1); + } else { + lex[dim - 1] = cur_lex - 1; + shiftLexicalOrder(lex, dim - 1, -1); + } + // update dependence graph + int dep_dim = get_dep_dim_of(stmt_num, level); + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, std::vector<DependenceVector> > > D; + + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + if (what_stmt_num.find(i) != what_stmt_num.end() + && what_stmt_num.find(j->first) + != what_stmt_num.end()) + dep.connect(what_stmt_num[i], + what_stmt_num[j->first], j->second); + if (place_after + && what_stmt_num.find(j->first) + != what_stmt_num.end()) { + std::vector<DependenceVector> dvs; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.is_data_dependence() && dep_dim != -1) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + dvs.push_back(dv); + } + if (dvs.size() > 0) + D.push_back( + std::make_pair(what_stmt_num[j->first], + dvs)); + } else if (!place_after + && what_stmt_num.find(i) + != what_stmt_num.end()) { + std::vector<DependenceVector> dvs; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.is_data_dependence() && dep_dim != -1) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + dvs.push_back(dv); + } + if (dvs.size() > 0) + dep.connect(what_stmt_num[i], j->first, dvs); + + } + } else { + if (what_stmt_num.find(i) != what_stmt_num.end()) + dep.connect(what_stmt_num[i], j->first, j->second); + } + } else if (same_loop.find(j->first) != same_loop.end()) { + if (what_stmt_num.find(j->first) != what_stmt_num.end()) + D.push_back( + std::make_pair(what_stmt_num[j->first], + j->second)); + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + + } + + return result; +} + +void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, + TilingMethodType method, int alignment_offset, int alignment_multiple) { + // check for sanity of parameters + if (tile_size < 0) + throw std::invalid_argument("invalid tile size"); + if (alignment_multiple < 1 || alignment_offset < 0) + throw std::invalid_argument("invalid alignment for tile"); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0) + throw std::invalid_argument("invalid loop level " + to_string(level)); + if (level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument( + "there is no loop level " + to_string(level) + " for statement " + + to_string(stmt_num)); + if (outer_level <= 0 || outer_level > level) + throw std::invalid_argument( + "invalid tile controlling loop level " + + to_string(outer_level)); + + int dim = 2 * level - 1; + int outer_dim = 2 * outer_level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_tiled_loop = getStatements(lex, dim - 1); + std::set<int> same_tile_controlling_loop = getStatements(lex, + outer_dim - 1); + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + int dim2 = level - 1; + if ((dv.type != DEP_CONTROL) && (dv.type != DEP_UNKNOWN)) { + while (stmt[i].loop_level[dim2].type == LoopLevelTile) { + dim2 = stmt[i].loop_level[dim2].payload; + } + dim2 = stmt[i].loop_level[dim2].payload; + + if ((dv.hasNegative(dim2) && (!dv.quasi)) + || (dv.quasi && dv.hasPositive(dim2))) { + for (int l = outer_level; l < level; l++) + if (stmt[i].loop_level[l - 1].type + != LoopLevelTile) { + if (dv.isCarried( + stmt[i].loop_level[l - 1].payload)) + throw loop_error( + "loop error: Tiling is illegal, dependence violation!"); + } else { + + int dim3 = l - 1; + while (stmt[i].loop_level[l - 1].type + != LoopLevelTile) { + dim3 = stmt[i].loop_level[l - 1].payload; + + } + + dim3 = stmt[i].loop_level[l - 1].payload; + if (dim3 < level - 1) + if (dv.isCarried(dim3)) + throw loop_error( + "loop error: Tiling is illegal, dependence violation!"); + } + } + } + } + } + } + // special case for no tiling + if (tile_size == 0) { + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); + F_And *f_root = r.add_and(); + for (int j = 1; j <= 2 * outer_level - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + } + EQ_Handle h1 = f_root->add_EQ(); + h1.update_coef(r.output_var(2 * outer_level), 1); + EQ_Handle h2 = f_root->add_EQ(); + h2.update_coef(r.output_var(2 * outer_level + 1), 1); + for (int j = 2 * outer_level; j <= stmt[*i].xform.n_out(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j + 2), -1); + } + + stmt[*i].xform = Composition(copy(r), stmt[*i].xform); + } + } + // normal tiling + else { + std::set<int> private_stmt; + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { +// if (same_tiled_loop.find(*i) == same_tiled_loop.end() && !is_single_iteration(getNewIS(*i), dim)) +// same_tiled_loop.insert(*i); + + // should test dim's value directly but it is ok for now +// if (same_tiled_loop.find(*i) == same_tiled_loop.end() && get_const(stmt[*i].xform, dim+1, Output_Var) == posInfinity) + if (same_tiled_loop.find(*i) == same_tiled_loop.end() + && overflow.find(*i) != overflow.end()) + private_stmt.insert(*i); + } + + // extract the union of the iteration space to be considered + Relation hull; + { + Tuple < Relation > r_list; + Tuple<int> r_mask; + + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) + if (private_stmt.find(*i) == private_stmt.end()) { + Relation r = project_onto_levels(getNewIS(*i), dim + 1, + true); + for (int j = outer_dim; j < dim; j++) + r = Project(r, j + 1, Set_Var); + for (int j = 0; j < outer_dim; j += 2) + r = Project(r, j + 1, Set_Var); + r_list.append(r); + r_mask.append(1); + } + + hull = Hull(r_list, r_mask, 1, true); + } + + // extract the bound of the dimension to be tiled + Relation bound = get_loop_bound(hull, dim); + if (!bound.has_single_conjunct()) { + // further simplify the bound + hull = Approximate(hull); + bound = get_loop_bound(hull, dim); + + int i = outer_dim - 2; + while (!bound.has_single_conjunct() && i >= 0) { + hull = Project(hull, i + 1, Set_Var); + bound = get_loop_bound(hull, dim); + i -= 2; + } + + if (!bound.has_single_conjunct()) + throw loop_error("cannot handle tile bounds"); + } + + // separate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + { + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(dim + 1)); + if (coef < 0) + ub_list.push_back(*gi); + else if (coef > 0) + lb_list.push_back(*gi); + } + } + if (lb_list.size() == 0) + throw loop_error( + "unable to calculate tile controlling loop lower bound"); + if (ub_list.size() == 0) + throw loop_error( + "unable to calculate tile controlling loop upper bound"); + + // find the simplest lower bound for StridedTile or simplest iteration count for CountedTile + int simplest_lb = 0, simplest_ub = 0; + if (method == StridedTile) { + int best_cost = INT_MAX; + for (int i = 0; i < lb_list.size(); i++) { + int cost = 0; + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + cost += 5; + break; + } + case Global_Var: { + cost += 2; + break; + } + default: + cost += 15; + break; + } + } + + if (cost < best_cost) { + best_cost = cost; + simplest_lb = i; + } + } + } else if (method == CountedTile) { + std::map<Variable_ID, coef_t> s1, s2, s3; + int best_cost = INT_MAX; + for (int i = 0; i < lb_list.size(); i++) + for (int j = 0; j < ub_list.size(); j++) { + int cost = 0; + + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + cost = INT_MAX - 2; + break; + } + } + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + if (cost == INT_MAX - 2) + cost = INT_MAX - 1; + else + cost = INT_MAX - 3; + break; + } + } + + if (cost == 0) { + for (std::map<Variable_ID, coef_t>::iterator k = + s1.begin(); k != s1.end(); k++) + if ((*k).second != 0) + cost += 5; + for (std::map<Variable_ID, coef_t>::iterator k = + s2.begin(); k != s2.end(); k++) + if ((*k).second != 0) + cost += 2; + for (std::map<Variable_ID, coef_t>::iterator k = + s3.begin(); k != s3.end(); k++) + if ((*k).second != 0) + cost += 15; + } + + if (cost < best_cost) { + best_cost = cost; + simplest_lb = i; + simplest_ub = j; + } + } + } + + // prepare the new transformation relations + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); + F_And *f_root = r.add_and(); + for (int j = 0; j < outer_dim - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(j + 1), 1); + h.update_coef(r.input_var(j + 1), -1); + } + + for (int j = outer_dim - 1; j < stmt[*i].xform.n_out(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(j + 3), 1); + h.update_coef(r.input_var(j + 1), -1); + } + + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(outer_dim), 1); + h.update_const(-lex[outer_dim - 1]); + + stmt[*i].xform = Composition(r, stmt[*i].xform); + } + + // add tiling constraints. + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + F_And *f_super_root = stmt[*i].xform.and_with_and(); + F_Exists *f_exists = f_super_root->add_exists(); + F_And *f_root = f_exists->add_and(); + + // create a lower bound variable for easy formula creation later + Variable_ID aligned_lb; + { + Variable_ID lb = f_exists->declare(); + coef_t coef = lb_list[simplest_lb].get_coef( + bound.set_var(dim + 1)); + if (coef == 1) { // e.g. if i >= m+5, then LB = m+5 + EQ_Handle h = f_root->add_EQ(); + h.update_coef(lb, 1); + for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h.update_const(lb_list[simplest_lb].get_const()); + } else { // e.g. if 2i >= m+5, then m+5 <= 2*LB < m+5+2 + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(lb, (*ci).coef); + h2.update_coef(lb, -(*ci).coef); + } else { + h1.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), + -(*ci).coef); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h1.update_coef(v, (*ci).coef); + h2.update_coef(v, -(*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h1.update_const(lb_list[simplest_lb].get_const()); + h2.update_const(-lb_list[simplest_lb].get_const()); + h2.update_const(coef - 1); + } + + Variable_ID offset_lb; + if (alignment_offset == 0) + offset_lb = lb; + else { + EQ_Handle h = f_root->add_EQ(); + offset_lb = f_exists->declare(); + h.update_coef(offset_lb, 1); + h.update_coef(lb, -1); + h.update_const(alignment_offset); + } + + if (alignment_multiple == 1) { // trivial + aligned_lb = offset_lb; + } else { // e.g. to align at 4, aligned_lb = 4*alpha && LB-4 < 4*alpha <= LB + aligned_lb = f_exists->declare(); + Variable_ID e = f_exists->declare(); + + EQ_Handle h = f_root->add_EQ(); + h.update_coef(aligned_lb, 1); + h.update_coef(e, -alignment_multiple); + + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + h1.update_coef(e, alignment_multiple); + h2.update_coef(e, -alignment_multiple); + h1.update_coef(offset_lb, -1); + h2.update_coef(offset_lb, 1); + h1.update_const(alignment_multiple - 1); + } + } + + // create an upper bound variable for easy formula creation later + Variable_ID ub = f_exists->declare(); + { + coef_t coef = -ub_list[simplest_ub].get_coef( + bound.set_var(dim + 1)); + if (coef == 1) { // e.g. if i <= m+5, then UB = m+5 + EQ_Handle h = f_root->add_EQ(); + h.update_coef(ub, -1); + for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h.update_const(ub_list[simplest_ub].get_const()); + } else { // e.g. if 2i <= m+5, then m+5-2 < 2*UB <= m+5 + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(ub, -(*ci).coef); + h2.update_coef(ub, (*ci).coef); + } else { + h1.update_coef(stmt[*i].xform.output_var(pos), + -(*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h1.update_coef(v, -(*ci).coef); + h2.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h1.update_const(-ub_list[simplest_ub].get_const()); + h2.update_const(ub_list[simplest_ub].get_const()); + h1.update_const(coef - 1); + } + } + + // insert tile controlling loop constraints + if (method == StridedTile) { // e.g. ii = LB + 32 * alpha && alpha >= 0 + Variable_ID e = f_exists->declare(); + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(e, 1); + + EQ_Handle h2 = f_root->add_EQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); + h2.update_coef(e, -tile_size); + h2.update_coef(aligned_lb, -1); + } else if (method == CountedTile) { // e.g. 0 <= ii < ceiling((UB-LB+1)/32) + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + -tile_size); + h2.update_coef(aligned_lb, -1); + h2.update_coef(ub, 1); + } + + // special care for private statements like overflow assignment + if (private_stmt.find(*i) != private_stmt.end()) { // e.g. ii <= UB + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -1); + h.update_coef(ub, 1); + } + // if (private_stmt.find(*i) != private_stmt.end()) { + // if (stmt[*i].xform.n_out() > dim+3) { // e.g. ii <= UB && i = ii + // GEQ_Handle h = f_root->add_GEQ(); + // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + // h.update_coef(ub, 1); + + // stmt[*i].xform = Project(stmt[*i].xform, dim+3, Output_Var); + // f_root = stmt[*i].xform.and_with_and(); + // EQ_Handle h1 = f_root->add_EQ(); + // h1.update_coef(stmt[*i].xform.output_var(dim+3), 1); + // h1.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + // } + // else if (method == StridedTile) { // e.g. ii <= UB since i does not exist + // GEQ_Handle h = f_root->add_GEQ(); + // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + // h.update_coef(ub, 1); + // } + // } + + // restrict original loop index inside the tile + else { + if (method == StridedTile) { // e.g. ii <= i < ii + tile_size + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); + h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + -1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); + h2.update_const(tile_size - 1); + } else if (method == CountedTile) { // e.g. LB+32*ii <= i < LB+32*ii+tile_size + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + -tile_size); + h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); + h1.update_coef(aligned_lb, -1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + tile_size); + h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); + h2.update_const(tile_size - 1); + h2.update_coef(aligned_lb, 1); + } + } + } + } + + // update loop level information + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + for (int j = 1; j <= stmt[*i].loop_level.size(); j++) + switch (stmt[*i].loop_level[j - 1].type) { + case LoopLevelOriginal: + break; + case LoopLevelTile: + if (stmt[*i].loop_level[j - 1].payload >= outer_level) + stmt[*i].loop_level[j - 1].payload++; + break; + default: + throw loop_error( + "unknown loop level type for statement " + + to_string(*i)); + } + + LoopLevel ll; + ll.type = LoopLevelTile; + ll.payload = level + 1; + ll.parallel_level = 0; + stmt[*i].loop_level.insert( + stmt[*i].loop_level.begin() + (outer_level - 1), ll); + } +} + +std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount) { + // check for sanity of parameters + if (unroll_amount < 0) + throw std::invalid_argument( + "invalid unroll amount " + to_string(unroll_amount)); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + int dim = 2 * level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim - 1); + + // nothing to do + if (unroll_amount == 1) + return std::set<int>(); + + for (int i = 0; i < stmt.size(); i++) { + std::vector<std::pair<int, DependenceVector> > D; + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + int dim2 = level - 1; + if ((dv.type != DEP_CONTROL) && (dv.type != DEP_UNKNOWN)) { + + while (stmt[i].loop_level[dim2].type == LoopLevelTile) { + dim2 = xform_index[dim2].first; + } + dim2 = stmt[i].loop_level[dim2].payload; + + if (dv.isCarried(dim2) + && (dv.hasNegative(dim2) && !dv.quasi)) + throw loop_error( + "loop error: Unrolling is illegal, dependence violation!"); + + if (dv.isCarried(dim2) + && (dv.hasPositive(dim2) && dv.quasi)) + throw loop_error( + "loop error: Unrolling is illegal, dependence violation!"); + bool safe = false; + + if (dv.isCarried(dim2)) { + + if (!dv.quasi) { + if (dv.lbounds[dim2] != posInfinity) { + if (dv.lbounds[dim2] != negInfinity) + if (dv.lbounds[dim2] > unroll_amount) + safe = true; + } else + safe = true; + } else { + if (dv.ubounds[dim2] != negInfinity) { + if (dv.ubounds[dim2] != posInfinity) + if ((-(dv.ubounds[dim2])) > unroll_amount) + safe = true; + } else + safe = true; + } + + if (!safe) { + for (int l = level; l <= (n - 1) / 2; l++) { + int dim3 = l - 1; + + if (stmt[i].loop_level[dim3].type + != LoopLevelTile) + dim3 = stmt[i].loop_level[dim3].payload; + else { + while (stmt[i].loop_level[dim2].type + == LoopLevelTile) { + dim3 = stmt[i].loop_level[dim3].payload; + } + dim3 = stmt[i].loop_level[dim3].payload; + } + + if (dim3 > dim2) { + if ((dv.hasPositive(dim3) && !dv.quasi) + || (dv.hasNegative(dim3) && dv.quasi)) + break; + else if ((dv.hasNegative(dim3) && !dv.quasi) + || (dv.hasPositive(dim3) && dv.quasi)) + throw loop_error( + "loop error: Unrolling is illegal, dependence violation!"); + } + } + } + } + } + } + } + } + + // extract the intersection of the iteration space to be considered + Relation hull = Relation::True(level); + apply_xform(same_loop); + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); + i++) { + if (stmt[*i].IS.is_upper_bound_satisfiable()) { + Relation mapping(stmt[*i].IS.n_set(), level); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(j), 1); + h.update_coef(mapping.output_var(j), -1); + } + hull = Intersection(hull, + Range(Restrict_Domain(mapping, copy(stmt[*i].IS)))); + hull.simplify(2, 4); + } + } + for (int i = 1; i <= level; i++) { + std::string name = tmp_loop_var_name_prefix + to_string(i); + hull.name_set_var(i, name); + } + hull.setup_names(); + + // extract the exact loop bound of the dimension to be unrolled + if (is_single_loop_iteration(hull, level, this->known)) + return std::set<int>(); + Relation bound = get_loop_bound(hull, level, this->known); + if (!bound.has_single_conjunct() || !bound.is_satisfiable() + || bound.is_tautology()) + throw loop_error("unable to extract loop bound for unrolling"); + + // extract the loop stride + EQ_Handle stride_eq; + int stride = 1; + { + bool simple_stride = true; + int strides = countStrides(bound.query_DNF()->single_conjunct(), + bound.set_var(level), stride_eq, simple_stride); + if (strides > 1) + throw loop_error("too many strides"); + else if (strides == 1) { + int sign = stride_eq.get_coef(bound.set_var(level)); + Constr_Vars_Iter it(stride_eq, true); + stride = abs((*it).coef / sign); + } + } + + // separate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + { + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(level)); + if (coef < 0) + ub_list.push_back(*gi); + else if (coef > 0) + lb_list.push_back(*gi); + } + } + + // simplify overflow expression for each pair of upper and lower bounds + std::vector<std::vector<std::map<Variable_ID, int> > > overflow_table( + lb_list.size(), + std::vector<std::map<Variable_ID, int> >(ub_list.size(), + std::map<Variable_ID, int>())); + bool is_overflow_simplifiable = true; + for (int i = 0; i < lb_list.size(); i++) { + if (!is_overflow_simplifiable) + break; + + for (int j = 0; j < ub_list.size(); j++) { + // lower bound or upper bound has non-unit coefficient, can't simplify + if (ub_list[j].get_coef(bound.set_var(level)) != -1 + || lb_list[i].get_coef(bound.set_var(level)) != 1) { + is_overflow_simplifiable = false; + break; + } + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + if ((*ci).var != bound.set_var(level)) + overflow_table[i][j][(*ci).var] += (*ci).coef; + + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); + overflow_table[i][j][(*ci).var] += (*ci).coef; + break; + } + default: + throw loop_error("failed to calculate overflow amount"); + } + } + overflow_table[i][j][NULL] += ub_list[j].get_const(); + + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + if ((*ci).var != bound.set_var(level)) { + overflow_table[i][j][(*ci).var] += (*ci).coef; + if (overflow_table[i][j][(*ci).var] == 0) + overflow_table[i][j].erase( + overflow_table[i][j].find((*ci).var)); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); + overflow_table[i][j][(*ci).var] += (*ci).coef; + if (overflow_table[i][j][(*ci).var] == 0) + overflow_table[i][j].erase( + overflow_table[i][j].find((*ci).var)); + break; + } + default: + throw loop_error("failed to calculate overflow amount"); + } + } + overflow_table[i][j][NULL] += lb_list[i].get_const(); + + overflow_table[i][j][NULL] += stride; + if (unroll_amount == 0 + || (overflow_table[i][j].size() == 1 + && overflow_table[i][j][NULL] / stride + < unroll_amount)) + unroll_amount = overflow_table[i][j][NULL] / stride; + } + } + + // loop iteration count can't be determined, bail out gracefully + if (unroll_amount == 0) + return std::set<int>(); + + // further simply overflow calculation using coefficients' modular + if (is_overflow_simplifiable) { + for (int i = 0; i < lb_list.size(); i++) + for (int j = 0; j < ub_list.size(); j++) + if (stride == 1) { + for (std::map<Variable_ID, int>::iterator k = + overflow_table[i][j].begin(); + k != overflow_table[i][j].end();) + if ((*k).first != NULL) { + int t = int_mod_hat((*k).second, unroll_amount); + if (t == 0) { + overflow_table[i][j].erase(k++); + } else { + int t2 = hull.query_variable_mod((*k).first, + unroll_amount); + if (t2 != INT_MAX) { + overflow_table[i][j][NULL] += t * t2; + overflow_table[i][j].erase(k++); + } else { + (*k).second = t; + k++; + } + } + } else + k++; + + overflow_table[i][j][NULL] = int_mod_hat( + overflow_table[i][j][NULL], unroll_amount); + + // Since we don't have MODULO instruction in SUIF yet (only MOD), make all coef positive in the final formula + for (std::map<Variable_ID, int>::iterator k = + overflow_table[i][j].begin(); + k != overflow_table[i][j].end(); k++) + if ((*k).second < 0) + (*k).second += unroll_amount; + } + } + + // build overflow statement + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *overflow_code = NULL; + Relation cond_upper(level), cond_lower(level); + Relation overflow_constraint(0); + F_And *overflow_constraint_root = overflow_constraint.add_and(); + std::vector<Free_Var_Decl *> over_var_list; + if (is_overflow_simplifiable && lb_list.size() == 1) { + for (int i = 0; i < ub_list.size(); i++) { + if (overflow_table[0][i].size() == 1) { + // upper splitting condition + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_const( + ((overflow_table[0][i][NULL] / stride) % unroll_amount) + * -stride); + } else { + // upper splitting condition + std::string over_name = overflow_var_name_prefix + + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_coef(cond_upper.get_local(over_free_var), -stride); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount - 1); + + // create overflow assignment + bound.setup_names(); + CG_outputRepr *rhs = NULL; + for (std::map<Variable_ID, int>::iterator j = + overflow_table[0][i].begin(); + j != overflow_table[0][i].end(); j++) + if ((*j).first != NULL) { + CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); + if ((*j).second != 1) + t = ocg->CreateTimes(ocg->CreateInt((*j).second), + t); + rhs = ocg->CreatePlus(rhs, t); + } else if ((*j).second != 0) + rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); + + if (stride != 1) + rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, + ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->StmtListAppend(overflow_code, + ocg->CreateAssignment(0, lhs, rhs)); + } + } + + // lower splitting condition + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[0]); + } else if (is_overflow_simplifiable && ub_list.size() == 1) { + for (int i = 0; i < lb_list.size(); i++) { + + if (overflow_table[i][0].size() == 1) { + // lower splitting condition + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + h.update_const(overflow_table[i][0][NULL] * -stride); + } else { + // lower splitting condition + std::string over_name = overflow_var_name_prefix + + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + h.update_coef(cond_lower.get_local(over_free_var), -stride); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount - 1); + + // create overflow assignment + bound.setup_names(); + CG_outputRepr *rhs = NULL; + for (std::map<Variable_ID, int>::iterator j = + overflow_table[0][i].begin(); + j != overflow_table[0][i].end(); j++) + if ((*j).first != NULL) { + CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); + if ((*j).second != 1) + t = ocg->CreateTimes(ocg->CreateInt((*j).second), + t); + rhs = ocg->CreatePlus(rhs, t); + } else if ((*j).second != 0) + rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); + + if (stride != 1) + rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, + ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->StmtListAppend(overflow_code, + ocg->CreateAssignment(0, lhs, rhs)); + } + } + + // upper splitting condition + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[0]); + } else { + std::string over_name = overflow_var_name_prefix + + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + + Tuple<CG_outputRepr *> lb_repr_list, ub_repr_list; + for (int i = 0; i < lb_list.size(); i++) { + //lb_repr_list.append(outputLBasRepr(ocg, lb_list[i], bound, bound.set_var(dim+1), stride, stride_eq, Relation::True(bound.n_set()), std::vector<CG_outputRepr *>(bound.n_set(), NULL))); + lb_repr_list.append( + outputLBasRepr(ocg, lb_list[i], bound, + bound.set_var(dim + 1), stride, stride_eq, + Relation::True(bound.n_set()), + std::vector<CG_outputRepr *>(bound.n_set()))); + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + } + for (int i = 0; i < ub_list.size(); i++) { + //ub_repr_list.append(outputUBasRepr(ocg, ub_list[i], bound, bound.set_var(dim+1), stride, stride_eq, std::vector<CG_outputRepr *>(bound.n_set(), NULL))); + ub_repr_list.append( + outputUBasRepr(ocg, ub_list[i], bound, + bound.set_var(dim + 1), stride, stride_eq, + std::vector<CG_outputRepr *>(bound.n_set()))); + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_coef(cond_upper.get_local(over_free_var), -stride); + } + + CG_outputRepr *lbRepr, *ubRepr; + if (lb_repr_list.size() > 1) + lbRepr = ocg->CreateInvoke("max", lb_repr_list); + else if (lb_repr_list.size() == 1) + lbRepr = lb_repr_list[1]; + + if (ub_repr_list.size() > 1) + ubRepr = ocg->CreateInvoke("min", ub_repr_list); + else if (ub_repr_list.size() == 1) + ubRepr = ub_repr_list[1]; + + // create overflow assignment + bound.setup_names(); + CG_outputRepr *rhs = ocg->CreatePlus(ocg->CreateMinus(ubRepr, lbRepr), + ocg->CreateInt(1)); + if (stride != 1) + rhs = ocg->CreateIntegerDivide(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, + ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->CreateAssignment(0, lhs, rhs); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount - 1); + } + + // insert overflow statement + int overflow_stmt_num = -1; + if (overflow_code != NULL) { + // build iteration space for overflow statement + Relation mapping(level, level - 1); + F_And *f_root = mapping.add_and(); + for (int i = 1; i < level; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), 1); + h.update_coef(mapping.input_var(i), -1); + } + Relation overflow_IS = Range(Restrict_Domain(mapping, copy(hull))); + for (int i = 1; i < level; i++) + overflow_IS.name_set_var(i, hull.set_var(i)->name()); + overflow_IS.setup_names(); + + // build dumb transformation relation for overflow statement + Relation overflow_xform(level - 1, 2 * (level - 1) + 1); + f_root = overflow_xform.add_and(); + for (int i = 1; i <= level - 1; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(overflow_xform.output_var(2 * i), 1); + h.update_coef(overflow_xform.input_var(i), -1); + + h = f_root->add_EQ(); + h.update_coef(overflow_xform.output_var(2 * i - 1), 1); + h.update_const(-lex[2 * i - 2]); + } + EQ_Handle h = f_root->add_EQ(); + h.update_coef(overflow_xform.output_var(2 * (level - 1) + 1), 1); + h.update_const(-lex[2 * (level - 1)]); + + shiftLexicalOrder(lex, dim - 1, 1); + Statement overflow_stmt; + overflow_stmt.code = overflow_code; + overflow_stmt.IS = overflow_IS; + overflow_stmt.xform = overflow_xform; + overflow_stmt.loop_level = std::vector<LoopLevel>(level - 1); + for (int i = 0; i < level - 1; i++) { + overflow_stmt.loop_level[i].type = + stmt[stmt_num].loop_level[i].type; + if (stmt[stmt_num].loop_level[i].type == LoopLevelTile + && stmt[stmt_num].loop_level[i].payload >= level) + overflow_stmt.loop_level[i].payload = -1; + else + overflow_stmt.loop_level[i].payload = + stmt[stmt_num].loop_level[i].payload; + overflow_stmt.loop_level[i].parallel_level = + stmt[stmt_num].loop_level[i].parallel_level; + } + stmt.push_back(overflow_stmt); + dep.insert(); + overflow_stmt_num = stmt.size() - 1; + overflow[overflow_stmt_num] = over_var_list; + + // update the global known information on overflow variable + this->known = Intersection(this->known, + Extend_Set(copy(overflow_constraint), + this->known.n_set() - overflow_constraint.n_set())); + + // update dependence graph + DependenceVector dv; + dv.type = DEP_CONTROL; + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + dep.connect(overflow_stmt_num, *i, dv); + dv.type = DEP_W2W; + { + IR_ScalarSymbol *overflow_sym = NULL; + std::vector<IR_ScalarRef *> scalars = ir->FindScalarRef( + overflow_code); + for (int i = scalars.size() - 1; i >= 0; i--) + if (scalars[i]->is_write()) { + overflow_sym = scalars[i]->symbol(); + break; + } + for (int i = scalars.size() - 1; i >= 0; i--) + delete scalars[i]; + dv.sym = overflow_sym; + } + dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); + dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); + int dep_dim = get_last_dep_dim_before(stmt_num, level); + for (int i = dep_dim + 1; i < num_dep_dim; i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (int i = 0; i <= dep_dim; i++) { + if (i != 0) { + dv.lbounds[i - 1] = 0; + dv.ubounds[i - 1] = 0; + } + dv.lbounds[i] = 1; + dv.ubounds[i] = posInfinity; + dep.connect(overflow_stmt_num, overflow_stmt_num, dv); + } + } + + // split the loop so it can be fully unrolled + std::set<int> result = split(stmt_num, level, cond_upper); + std::set<int> result2 = split(stmt_num, level, cond_lower); + for (std::set<int>::iterator i = result2.begin(); i != result2.end(); i++) + result.insert(*i); + + // check if unrolled statements can be trivially lumped together as one statement + bool can_be_lumped = true; + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + if (*i != stmt_num) { + if (stmt[*i].loop_level.size() + != stmt[stmt_num].loop_level.size()) { + can_be_lumped = false; + break; + } + for (int j = 0; j < stmt[stmt_num].loop_level.size(); j++) + if (!(stmt[*i].loop_level[j].type + == stmt[stmt_num].loop_level[j].type + && stmt[*i].loop_level[j].payload + == stmt[stmt_num].loop_level[j].payload)) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 2 * level; j < lex.size() - 1; j += 2) + if (lex[j] != lex2[j]) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + if (is_inner_loop_depend_on_level(stmt[*i].IS, level, known)) { + can_be_lumped = false; + break; + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + if (*i != stmt_num) { + if (!(Must_Be_Subset(copy(stmt[*i].IS), copy(stmt[stmt_num].IS)) + && Must_Be_Subset(copy(stmt[stmt_num].IS), + copy(stmt[*i].IS)))) { + can_be_lumped = false; + break; + } + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); + j != dep.vertex[*i].second.end(); j++) + if (same_loop.find(j->first) != same_loop.end()) { + for (int k = 0; k < j->second.size(); k++) + if (j->second[k].type == DEP_CONTROL + || j->second[k].type == DEP_UNKNOWN) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + } + if (!can_be_lumped) + break; + } + } + + // add strides to original statements + // for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) + // add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); + + // std::vector<Free_Var_Decl *> depending_overflow_var; + // for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + // add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); + // if (overflow.find(*i) != overflow.end()) { + // // TO DO: It should check whether overflow vaiable depends on + // // this loop index and by how much. This step is important if + // // you want to unroll loops in arbitrary order. + // depending_overflow_var.insert(depending_overflow_var.end(), overflow[*i].begin(), overflow[*i].end()); + + // continue; + // } + // } + +// std::map<int, std::vector<Statement> > pending; +// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { +// add_loop_stride(stmt[*i].IS, bound, level-1, unroll_amount * stride); + +// if (overflow.find(*i) != overflow.end()) { +// // TO DO: It should check whether overflow vaiable depends on +// // this loop index and by how much. This step is important if +// // you want to unroll loops in arbitrary order. +// depending_overflow_var.insert(depending_overflow_var.end(), overflow[*i].begin(), overflow[*i].end()); + +// continue; +// } + +// // create copy for each unroll amount +// for (int j = 1; j < unroll_amount; j++) { +// Tuple<CG_outputRepr *> funcList; +// Tuple<std::string> loop_vars; +// loop_vars.append(stmt[*i].IS.set_var((dim+1)/2)->name()); +// funcList.append(ocg->CreatePlus(ocg->CreateIdent(stmt[*i].IS.set_var(level)->name()), ocg->CreateInt(j*stride))); +// CG_outputRepr *code = ocg->CreatePlaceHolder(0, stmt[*i].code->clone(), funcList, loop_vars); + +// // prepare the new statment to insert +// Statement unrolled_stmt; +// unrolled_stmt.IS = copy(stmt[*i].IS); +// // adjust_loop_bound(unrolled_stmt.IS, (dim-1)/2, j); +// unrolled_stmt.xform = copy(stmt[*i].xform); +// unrolled_stmt.code = code; +// unrolled_stmt.loop_level = stmt[*i].loop_level; +// pending[*i].push_back(unrolled_stmt); +// } +// } + +// // adjust iteration space due to loop bounds depending on this loop +// // index and affected overflow variables +// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { +// for (int j = 0; j < pending[*i].size(); j++) { +// adjust_loop_bound(pending[*i][j].IS, (dim-1)/2, j+1, depending_overflow_var); +// //pending[*i][j].IS = Intersection(pending[*i][j].IS, Extend_Set(copy(this->known), pending[*i][j].IS.n_set() - this->known.n_set())); +// } +// } + + // insert unrolled statements + int old_num_stmt = stmt.size(); + if (!can_be_lumped) { + std::map<int, std::vector<int> > what_stmt_num; + + for (int j = 1; j < unroll_amount; j++) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + Statement new_stmt; + + Tuple<CG_outputRepr *> funcList; + Tuple<std::string> loop_vars; + loop_vars.append(stmt[*i].IS.set_var(level)->name()); + funcList.append( + ocg->CreatePlus( + ocg->CreateIdent( + stmt[*i].IS.set_var(level)->name()), + ocg->CreateInt(j * stride))); + new_stmt.code = ocg->CreatePlaceHolder(0, + stmt[*i].code->clone(), funcList, loop_vars); + + new_stmt.IS = adjust_loop_bound(stmt[*i].IS, level, j * stride); + add_loop_stride(new_stmt.IS, bound, level - 1, + unroll_amount * stride); + + new_stmt.xform = copy(stmt[*i].xform); + new_stmt.loop_level = stmt[*i].loop_level; + stmt.push_back(new_stmt); + dep.insert(); + what_stmt_num[*i].push_back(stmt.size() - 1); + } + } + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + add_loop_stride(stmt[*i].IS, bound, level - 1, + unroll_amount * stride); + + // update dependence graph + if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; + int new_stride = unroll_amount * stride; + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end();) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type == DEP_CONTROL + || dv.type == DEP_UNKNOWN) { + D.push_back(std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount - 1; + kk++) + if (what_stmt_num[i][kk] != -1 + && what_stmt_num[j->first][kk] + != -1) + dep.connect(what_stmt_num[i][kk], + what_stmt_num[j->first][kk], + dv); + } else { + coef_t lb = dv.lbounds[dep_dim]; + coef_t ub = dv.ubounds[dep_dim]; + if (ub == lb + && int_mod(lb, + static_cast<coef_t>(new_stride)) + == 0) { + D.push_back( + std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount - 1; + kk++) + if (what_stmt_num[i][kk] != -1 + && what_stmt_num[j->first][kk] + != -1) + dep.connect( + what_stmt_num[i][kk], + what_stmt_num[j->first][kk], + dv); + } else if (lb == -posInfinity + && ub == posInfinity) { + D.push_back( + std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount; + kk++) + if (kk == 0) + D.push_back( + std::make_pair(j->first, + dv)); + else if (what_stmt_num[j->first][kk + - 1] != -1) + D.push_back( + std::make_pair( + what_stmt_num[j->first][kk + - 1], + dv)); + for (int t = 0; t < unroll_amount - 1; + t++) + if (what_stmt_num[i][t] != -1) + for (int kk = 0; + kk < unroll_amount; + kk++) + if (kk == 0) + dep.connect( + what_stmt_num[i][t], + j->first, dv); + else if (what_stmt_num[j->first][kk + - 1] != -1) + dep.connect( + what_stmt_num[i][t], + what_stmt_num[j->first][kk + - 1], + dv); + } else { + for (int kk = 0; kk < unroll_amount; + kk++) { + if (lb != -posInfinity) { + if (kk * stride + < int_mod(lb, + static_cast<coef_t>(new_stride))) + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb) + / new_stride) + * new_stride + + new_stride; + else + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb) + / new_stride) + * new_stride; + } + if (ub != posInfinity) { + if (kk * stride + > int_mod(ub, + static_cast<coef_t>(new_stride))) + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub) + / new_stride) + * new_stride + - new_stride; + else + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub) + / new_stride) + * new_stride; + } + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) { + if (kk == 0) + D.push_back( + std::make_pair( + j->first, + dv)); + else if (what_stmt_num[j->first][kk + - 1] != -1) + D.push_back( + std::make_pair( + what_stmt_num[j->first][kk + - 1], + dv)); + } + } + for (int t = 0; t < unroll_amount - 1; + t++) + if (what_stmt_num[i][t] != -1) + for (int kk = 0; + kk < unroll_amount; + kk++) { + if (lb != -posInfinity) { + if (kk * stride + < int_mod( + lb + t + + 1, + static_cast<coef_t>(new_stride))) + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb + + (t + + 1) + * stride) + / new_stride) + * new_stride + + new_stride; + else + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb + + (t + + 1) + * stride) + / new_stride) + * new_stride; + } + if (ub != posInfinity) { + if (kk * stride + > int_mod( + ub + t + + 1, + static_cast<coef_t>(new_stride))) + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub + + (t + + 1) + * stride) + / new_stride) + * new_stride + - new_stride; + else + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub + + (t + + 1) + * stride) + / new_stride) + * new_stride; + } + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) { + if (kk == 0) + dep.connect( + what_stmt_num[i][t], + j->first, + dv); + else if (what_stmt_num[j->first][kk + - 1] != -1) + dep.connect( + what_stmt_num[i][t], + what_stmt_num[j->first][kk + - 1], + dv); + } + } + } + } + } + + dep.vertex[i].second.erase(j++); + } else { + for (int kk = 0; kk < unroll_amount - 1; kk++) + if (what_stmt_num[i][kk] != -1) + dep.connect(what_stmt_num[i][kk], j->first, + j->second); + + j++; + } + } else { + if (same_loop.find(j->first) != same_loop.end()) + for (int k = 0; k < j->second.size(); k++) + for (int kk = 0; kk < unroll_amount - 1; kk++) + if (what_stmt_num[j->first][kk] != -1) + D.push_back( + std::make_pair( + what_stmt_num[j->first][kk], + j->second[k])); + j++; + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + } + + // reset lexical order for the unrolled loop body + std::set<int> new_same_loop; + for (std::map<int, std::vector<int> >::iterator i = + what_stmt_num.begin(); i != what_stmt_num.end(); i++) { + new_same_loop.insert(i->first); + for (int j = 0; j < i->second.size(); j++) + new_same_loop.insert(i->second[j]); + } + setLexicalOrder(dim + 1, new_same_loop); + } else { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + add_loop_stride(stmt[*i].IS, bound, level - 1, + unroll_amount * stride); + + int max_level = stmt[stmt_num].loop_level.size(); + std::vector<std::pair<int, int> > stmt_order; + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + stmt_order.push_back( + std::make_pair( + get_const(stmt[*i].xform, 2 * max_level, + Output_Var), *i)); + sort(stmt_order.begin(), stmt_order.end()); + + Statement new_stmt; + new_stmt.code = NULL; + for (int j = 1; j < unroll_amount; j++) + for (int i = 0; i < stmt_order.size(); i++) { + Tuple<CG_outputRepr *> funcList; + Tuple<std::string> loop_vars; + loop_vars.append( + stmt[stmt_order[i].second].IS.set_var(level)->name()); + funcList.append( + ocg->CreatePlus( + ocg->CreateIdent( + stmt[stmt_order[i].second].IS.set_var( + level)->name()), + ocg->CreateInt(j * stride))); + CG_outputRepr *code = ocg->CreatePlaceHolder(0, + stmt[stmt_order[i].second].code->clone(), funcList, + loop_vars); + new_stmt.code = ocg->StmtListAppend(new_stmt.code, code); + } + + new_stmt.IS = copy(stmt[stmt_num].IS); + new_stmt.xform = copy(stmt[stmt_num].xform); + assign_const(new_stmt.xform, 2 * max_level, + stmt_order[stmt_order.size() - 1].first + 1); + new_stmt.loop_level = stmt[stmt_num].loop_level; + stmt.push_back(new_stmt); + dep.insert(); + + // update dependence graph + if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; + int new_stride = unroll_amount * stride; + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, std::vector<DependenceVector> > > D; + + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end();) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + std::vector<DependenceVector> dvs11, dvs12, dvs22, + dvs21; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type == DEP_CONTROL + || dv.type == DEP_UNKNOWN) { + if (i == j->first) { + dvs11.push_back(dv); + dvs22.push_back(dv); + } else + throw loop_error( + "unrolled statements lumped together illegally"); + } else { + coef_t lb = dv.lbounds[dep_dim]; + coef_t ub = dv.ubounds[dep_dim]; + if (ub == lb + && int_mod(lb, + static_cast<coef_t>(new_stride)) + == 0) { + dvs11.push_back(dv); + dvs22.push_back(dv); + } else { + if (lb != -posInfinity) + dv.lbounds[dep_dim] = ceil( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = floor( + static_cast<double>(ub) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs11.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = ceil( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = ceil( + static_cast<double>(ub) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs21.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = floor( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = floor( + static_cast<double>(ub + - stride) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs12.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = floor( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = ceil( + static_cast<double>(ub + - stride) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs22.push_back(dv); + } + } + } + if (dvs11.size() > 0) + D.push_back(std::make_pair(i, dvs11)); + if (dvs22.size() > 0) + dep.connect(old_num_stmt, old_num_stmt, dvs22); + if (dvs12.size() > 0) + D.push_back( + std::make_pair(old_num_stmt, dvs12)); + if (dvs21.size() > 0) + dep.connect(old_num_stmt, i, dvs21); + + dep.vertex[i].second.erase(j++); + } else { + dep.connect(old_num_stmt, j->first, j->second); + j++; + } + } else { + if (same_loop.find(j->first) != same_loop.end()) + D.push_back( + std::make_pair(old_num_stmt, j->second)); + j++; + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + } + } + + return result; +} + +std::vector<int> Loop::getLexicalOrder(int stmt_num) const { + assert(stmt_num < stmt.size()); + + const int n = stmt[stmt_num].xform.n_out(); + std::vector<int> lex(n, 0); + + for (int i = 0; i < n; i += 2) + lex[i] = get_const(stmt[stmt_num].xform, i, Output_Var); + + return lex; +} + +std::set<int> Loop::getStatements(const std::vector<int> &lex, int dim) const { + const int m = stmt.size(); + + std::set<int> same_loops; + for (int i = 0; i < m; i++) { + if (dim < 0) + same_loops.insert(i); + else { + std::vector<int> a_lex = getLexicalOrder(i); + int j; + for (j = 0; j <= dim; j += 2) + if (lex[j] != a_lex[j]) + break; + if (j > dim) + same_loops.insert(i); + } + } + + return same_loops; +} + +void Loop::shiftLexicalOrder(const std::vector<int> &lex, int dim, int amount) { + const int m = stmt.size(); + + if (amount == 0) + return; + + for (int i = 0; i < m; i++) { + std::vector<int> lex2 = getLexicalOrder(i); + + bool need_shift = true; + + for (int j = 0; j < dim; j++) + if (lex2[j] != lex[j]) { + need_shift = false; + break; + } + + if (!need_shift) + continue; + + if (amount > 0) { + if (lex2[dim] < lex[dim]) + continue; + } else if (amount < 0) { + if (lex2[dim] > lex[dim]) + continue; + } + + assign_const(stmt[i].xform, dim, lex2[dim] + amount); + } +} + +void Loop::setLexicalOrder(int dim, const std::set<int> &active, + int starting_order) { + if (active.size() == 0) + return; + + // check for sanity of parameters + if (dim < 0 || dim % 2 != 0) + throw std::invalid_argument( + "invalid constant loop level to set lexicographical order"); + std::vector<int> lex; + int ref_stmt_num; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if ((*i) < 0 || (*i) >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (dim >= stmt[*i].xform.n_out()) + throw std::invalid_argument( + "invalid constant loop level to set lexicographical order"); + if (i == active.begin()) { + lex = getLexicalOrder(*i); + ref_stmt_num = *i; + } else { + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 0; j < dim; j += 2) + if (lex[j] != lex2[j]) + throw std::invalid_argument( + "statements are not in the same sub loop nest"); + } + } + + // sepearate statements by current loop level types + int level = (dim + 2) / 2; + std::map<std::pair<LoopLevelType, int>, std::set<int> > active_by_level_type; + std::set<int> active_by_no_level; + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + if (level > stmt[*i].loop_level.size()) + active_by_no_level.insert(*i); + else + active_by_level_type[std::make_pair( + stmt[*i].loop_level[level - 1].type, + stmt[*i].loop_level[level - 1].payload)].insert(*i); + } + + // further separate statements due to control dependences + std::vector<std::set<int> > active_by_level_type_splitted; + for (std::map<std::pair<LoopLevelType, int>, std::set<int> >::iterator i = + active_by_level_type.begin(); i != active_by_level_type.end(); i++) + active_by_level_type_splitted.push_back(i->second); + for (std::set<int>::iterator i = active_by_no_level.begin(); + i != active_by_no_level.end(); i++) + for (int j = active_by_level_type_splitted.size() - 1; j >= 0; j--) { + std::set<int> controlled, not_controlled; + for (std::set<int>::iterator k = + active_by_level_type_splitted[j].begin(); + k != active_by_level_type_splitted[j].end(); k++) { + std::vector<DependenceVector> dvs = dep.getEdge(*i, *k); + bool is_controlled = false; + for (int kk = 0; kk < dvs.size(); kk++) + if (dvs[kk].type = DEP_CONTROL) { + is_controlled = true; + break; + } + if (is_controlled) + controlled.insert(*k); + else + not_controlled.insert(*k); + } + if (controlled.size() != 0 && not_controlled.size() != 0) { + active_by_level_type_splitted.erase( + active_by_level_type_splitted.begin() + j); + active_by_level_type_splitted.push_back(controlled); + active_by_level_type_splitted.push_back(not_controlled); + } + } + + // set lexical order separating loops with different loop types first + if (active_by_level_type_splitted.size() + active_by_no_level.size() > 1) { + int dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; + + Graph<std::set<int>, Empty> g; + for (std::vector<std::set<int> >::iterator i = + active_by_level_type_splitted.begin(); + i != active_by_level_type_splitted.end(); i++) + g.insert(*i); + for (std::set<int>::iterator i = active_by_no_level.begin(); + i != active_by_no_level.end(); i++) { + std::set<int> t; + t.insert(*i); + g.insert(t); + } + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + bool connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); + ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); + jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*ii, + *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && !dvs[k].has_been_carried_before( + dep_dim))) { + g.connect(i, j); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); + ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); + jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*jj, + *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && !dvs[k].has_been_carried_before( + dep_dim))) { + g.connect(j, i); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + } + + std::vector<std::set<int> > s = g.topoSort(); + if (s.size() != g.vertex.size()) + throw loop_error( + "cannot separate statements with different loop types at loop level " + + to_string(level)); + + // assign lexical order + int order = starting_order; + for (int i = 0; i < s.size(); i++) { + std::set<int> &cur_scc = g.vertex[*(s[i].begin())].first; + int sz = cur_scc.size(); + if (sz == 1) { + int cur_stmt = *(cur_scc.begin()); + assign_const(stmt[cur_stmt].xform, dim, order); + for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) + assign_const(stmt[cur_stmt].xform, j, 0); + order++; + } else { + setLexicalOrder(dim, cur_scc, order); + order += sz; + } + } + } + // set lexical order seperating single iteration statements and loops + else { + std::set<int> true_singles; + std::set<int> nonsingles; + std::map<coef_t, std::set<int> > fake_singles; + + // sort out statements that do not require loops + for (std::set<int>::iterator i = active.begin(); i != active.end(); + i++) { + Relation cur_IS = getNewIS(*i); + if (is_single_iteration(cur_IS, dim + 1)) { + bool is_all_single = true; + for (int j = dim + 3; j < stmt[*i].xform.n_out(); j += 2) + if (!is_single_iteration(cur_IS, j)) { + is_all_single = false; + break; + } + if (is_all_single) + true_singles.insert(*i); + else { + try { + fake_singles[get_const(cur_IS, dim + 1, Set_Var)].insert( + *i); + } catch (const std::exception &e) { + fake_singles[posInfinity].insert(*i); + } + } + } else + nonsingles.insert(*i); + } + + // split nonsingles forcibly according to negative dependences present (loop unfusible) + int dep_dim = get_dep_dim_of(ref_stmt_num, level); + Graph<int, Empty> g2; + for (std::set<int>::iterator i = nonsingles.begin(); + i != nonsingles.end(); i++) + g2.insert(*i); + for (int i = 0; i < g2.vertex.size(); i++) + for (int j = i + 1; j < g2.vertex.size(); j++) { + std::vector<DependenceVector> dvs = dep.getEdge( + g2.vertex[i].first, g2.vertex[j].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && dvs[k].has_negative_been_carried_at( + dep_dim))) { + g2.connect(i, j); + break; + } + dvs = dep.getEdge(g2.vertex[j].first, g2.vertex[i].first); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && dvs[k].has_negative_been_carried_at( + dep_dim))) { + g2.connect(j, i); + break; + } + } + + std::vector<std::set<int> > s2 = g2.packed_topoSort(); + + std::vector<std::set<int> > splitted_nonsingles; + for (int i = 0; i < s2.size(); i++) { + std::set<int> cur_scc; + for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); + j++) + cur_scc.insert(g2.vertex[*j].first); + splitted_nonsingles.push_back(cur_scc); + } + + // convert to dependence graph for grouped statements + dep_dim = get_last_dep_dim_before(ref_stmt_num, level) + 1; + Graph<std::set<int>, Empty> g; + for (std::set<int>::iterator i = true_singles.begin(); + i != true_singles.end(); i++) { + std::set<int> t; + t.insert(*i); + g.insert(t); + } + for (int i = 0; i < splitted_nonsingles.size(); i++) { + g.insert(splitted_nonsingles[i]); + } + for (std::map<coef_t, std::set<int> >::iterator i = + fake_singles.begin(); i != fake_singles.end(); i++) + g.insert((*i).second); + + for (int i = 0; i < g.vertex.size(); i++) + for (int j = i + 1; j < g.vertex.size(); j++) { + bool connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); + ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); + jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*ii, + *jj); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && !dvs[k].has_been_carried_before( + dep_dim))) { + g.connect(i, j); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + connected = false; + for (std::set<int>::iterator ii = g.vertex[i].first.begin(); + ii != g.vertex[i].first.end(); ii++) { + for (std::set<int>::iterator jj = g.vertex[j].first.begin(); + jj != g.vertex[j].first.end(); jj++) { + std::vector<DependenceVector> dvs = dep.getEdge(*jj, + *ii); + for (int k = 0; k < dvs.size(); k++) + if (dvs[k].is_control_dependence() + || (dvs[k].is_data_dependence() + && !dvs[k].has_been_carried_before( + dep_dim))) { + g.connect(j, i); + connected = true; + break; + } + if (connected) + break; + } + if (connected) + break; + } + } + + // topological sort according to chun's permute algorithm + std::vector<std::set<int> > s = g.topoSort(); + + // assign lexical order + int order = starting_order; + for (int i = 0; i < s.size(); i++) { + // translate each SCC into original statements + std::set<int> cur_scc; + for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) + copy(g.vertex[*j].first.begin(), g.vertex[*j].first.end(), + inserter(cur_scc, cur_scc.begin())); + + // now assign the constant + for (std::set<int>::iterator j = cur_scc.begin(); + j != cur_scc.end(); j++) + assign_const(stmt[*j].xform, dim, order); + + if (cur_scc.size() > 1) + setLexicalOrder(dim + 2, cur_scc); + else if (cur_scc.size() == 1) { + int cur_stmt = *(cur_scc.begin()); + for (int j = dim + 2; j < stmt[cur_stmt].xform.n_out(); j += 2) + assign_const(stmt[cur_stmt].xform, j, 0); + } + + if (cur_scc.size() > 0) + order++; + } + } +} + +void Loop::apply_xform() { + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + apply_xform(active); +} + +void Loop::apply_xform(int stmt_num) { + std::set<int> active; + active.insert(stmt_num); + apply_xform(active); +} + +void Loop::apply_xform(std::set<int> &active) { + int max_n = 0; + + CG_outputBuilder *ocg = ir->builder(); + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) { + int n = stmt[*i].loop_level.size(); + if (n > max_n) + max_n = n; + + std::vector<int> lex = getLexicalOrder(*i); + + Relation mapping(2 * n + 1, n); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_coef(mapping.input_var(2 * j), -1); + } + mapping = Composition(mapping, stmt[*i].xform); + mapping.simplify(); + + // match omega input/output variables to variable names in the code + for (int j = 1; j <= stmt[*i].IS.n_set(); j++) + mapping.name_input_var(j, stmt[*i].IS.set_var(j)->name()); + for (int j = 1; j <= n; j++) + mapping.name_output_var(j, + tmp_loop_var_name_prefix + + to_string(tmp_loop_var_name_counter + j - 1)); + mapping.setup_names(); + + Relation known = Extend_Set(copy(this->known), + mapping.n_out() - this->known.n_set()); + //stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, std::vector<CG_outputRepr *>(mapping.n_out(), NULL)); + stmt[*i].code = outputStatement(ocg, stmt[*i].code, 0, mapping, known, + std::vector<CG_outputRepr *>(mapping.n_out())); + stmt[*i].IS = Range(Restrict_Domain(mapping, stmt[*i].IS)); + stmt[*i].IS.simplify(); + + // replace original transformation relation with straight 1-1 mapping + mapping = Relation(n, 2 * n + 1); + f_root = mapping.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * j), 1); + h.update_coef(mapping.input_var(j), -1); + } + for (int j = 1; j <= 2 * n + 1; j += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(j), 1); + h.update_const(-lex[j - 1]); + } + stmt[*i].xform = mapping; + } + + tmp_loop_var_name_counter += max_n; +} + +void Loop::addKnown(const Relation &cond) { + int n1 = this->known.n_set(); + + Relation r = copy(cond); + int n2 = r.n_set(); + + if (n1 < n2) + this->known = Extend_Set(this->known, n2 - n1); + else if (n1 > n2) + r = Extend_Set(r, n1 - n2); + + this->known = Intersection(this->known, r); +} + +bool Loop::nonsingular(const std::vector<std::vector<int> > &T) { + if (stmt.size() == 0) + return true; + + // check for sanity of parameters + for (int i = 0; i < stmt.size(); i++) { + if (stmt[i].loop_level.size() != num_dep_dim) + throw std::invalid_argument( + "nonsingular loop transformations must be applied to original perfect loop nest"); + for (int j = 0; j < stmt[i].loop_level.size(); j++) + if (stmt[i].loop_level[j].type != LoopLevelOriginal) + throw std::invalid_argument( + "nonsingular loop transformations must be applied to original perfect loop nest"); + } + if (T.size() != num_dep_dim) + throw std::invalid_argument("invalid transformation matrix"); + for (int i = 0; i < stmt.size(); i++) + if (T[i].size() != num_dep_dim + 1 && T[i].size() != num_dep_dim) + throw std::invalid_argument("invalid transformation matrix"); + + // build relation from matrix + Relation mapping(2 * num_dep_dim + 1, 2 * num_dep_dim + 1); + F_And *f_root = mapping.add_and(); + for (int i = 0; i < num_dep_dim; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(2 * (i + 1)), -1); + for (int j = 0; j < num_dep_dim; j++) + if (T[i][j] != 0) + h.update_coef(mapping.input_var(2 * (j + 1)), T[i][j]); + if (T[i].size() == num_dep_dim + 1) + h.update_const(T[i][num_dep_dim]); + } + for (int i = 1; i <= 2 * num_dep_dim + 1; i += 2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), -1); + h.update_coef(mapping.input_var(i), 1); + } + + // update transformation relations + for (int i = 0; i < stmt.size(); i++) + stmt[i].xform = Composition(copy(mapping), stmt[i].xform); + + // update dependence graph + for (int i = 0; i < dep.vertex.size(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); + j++) { + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + switch (dv.type) { + case DEP_W2R: + case DEP_R2W: + case DEP_W2W: + case DEP_R2R: { + std::vector<coef_t> lbounds(num_dep_dim), ubounds( + num_dep_dim); + for (int p = 0; p < num_dep_dim; p++) { + coef_t lb = 0; + coef_t ub = 0; + for (int q = 0; q < num_dep_dim; q++) { + if (T[p][q] > 0) { + if (lb == -posInfinity + || dv.lbounds[q] == -posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.lbounds[q]; + if (ub == posInfinity + || dv.ubounds[q] == posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.ubounds[q]; + } else if (T[p][q] < 0) { + if (lb == -posInfinity + || dv.ubounds[q] == posInfinity) + lb = -posInfinity; + else + lb += T[p][q] * dv.ubounds[q]; + if (ub == posInfinity + || dv.lbounds[q] == -posInfinity) + ub = posInfinity; + else + ub += T[p][q] * dv.lbounds[q]; + } + } + if (T[p].size() == num_dep_dim + 1) { + if (lb != -posInfinity) + lb += T[p][num_dep_dim]; + if (ub != posInfinity) + ub += T[p][num_dep_dim]; + } + lbounds[p] = lb; + ubounds[p] = ub; + } + dv.lbounds = lbounds; + dv.ubounds = ubounds; + + break; + } + default: + ; + } + } + j->second = dvs; + } + + // set constant loop values + std::set<int> active; + for (int i = 0; i < stmt.size(); i++) + active.insert(i); + setLexicalOrder(0, active); + + return true; +} + +void Loop::skew(const std::set<int> &stmt_nums, int level, + const std::vector<int> &skew_amount) { + if (stmt_nums.size() == 0) + return; + + // check for sanity of parameters + int ref_stmt_num = *(stmt_nums.begin()); + std::vector<std::set<int> > array_of_deps; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (level < 1 || level > stmt[*i].loop_level.size()) + throw std::invalid_argument( + "invalid loop level " + to_string(level)); + for (int j = stmt[*i].loop_level.size(); j < skew_amount.size(); j++) + if (skew_amount[j] != 0) + throw std::invalid_argument("invalid skewing formula"); + } + + // set trasformation relations + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + int n = stmt[*i].xform.n_out(); + Relation r(n, n); + F_And *f_root = r.add_and(); + for (int j = 1; j <= n; j++) + if (j != 2 * level) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + } + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(2 * level), -1); + for (int j = 0; j < skew_amount.size(); j++) + if (skew_amount[j] != 0) + h.update_coef(r.input_var(2 * (j + 1)), skew_amount[j]); + + stmt[*i].xform = Composition(r, stmt[*i].xform); + stmt[*i].xform.simplify(); + applyXform(*i); + std::set<int> dont_consider; + //} + + // update dependence graph + if (stmt[ref_stmt_num].loop_level[level - 1].type + == LoopLevelOriginal) { + int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); + j != dep.vertex[*i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence between skewed statements + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + coef_t lb = 0; + coef_t ub = 0; + for (int kk = 0; kk < skew_amount.size(); + kk++) { + int cur_dep_dim = get_dep_dim_of(*i, + kk + 1); + if (skew_amount[kk] > 0) { + if (lb != -posInfinity + && stmt[*i].loop_level[kk].type + == LoopLevelOriginal + && dv.lbounds[cur_dep_dim] + != -posInfinity) + lb += skew_amount[kk] + * dv.lbounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] + == 0 + && dv.ubounds[cur_dep_dim] + == 0)) + lb = -posInfinity; + } + if (ub != posInfinity + && stmt[*i].loop_level[kk].type + == LoopLevelOriginal + && dv.ubounds[cur_dep_dim] + != posInfinity) + ub += skew_amount[kk] + * dv.ubounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] + == 0 + && dv.ubounds[cur_dep_dim] + == 0)) + ub = posInfinity; + } + } else if (skew_amount[kk] < 0) { + if (lb != -posInfinity + && stmt[*i].loop_level[kk].type + == LoopLevelOriginal + && dv.ubounds[cur_dep_dim] + != posInfinity) + lb += skew_amount[kk] + * dv.ubounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] + == 0 + && dv.ubounds[cur_dep_dim] + == 0)) + lb = -posInfinity; + } + if (ub != posInfinity + && stmt[*i].loop_level[kk].type + == LoopLevelOriginal + && dv.lbounds[cur_dep_dim] + != -posInfinity) + ub += skew_amount[kk] + * dv.lbounds[cur_dep_dim]; + else { + if (cur_dep_dim != -1 + && !(dv.lbounds[cur_dep_dim] + == 0 + && dv.ubounds[cur_dep_dim] + == 0)) + ub = posInfinity; + } + } + } + if ((dv.isCarried(dep_dim) + && dv.hasPositive(dep_dim)) && dv.quasi) + dv.quasi = false; + + if ((dv.isCarried(dep_dim) + && dv.hasNegative(dep_dim)) + && !dv.quasi) + throw loop_error( + "loop error: Skewing is illegal, dependence violation!"); + dv.lbounds[dep_dim] = lb; + dv.ubounds[dep_dim] = ub; + if ((dv.isCarried(dep_dim) + && dv.hasPositive(dep_dim)) && dv.quasi) + dv.quasi = false; + + if ((dv.isCarried(dep_dim) + && dv.hasNegative(dep_dim)) + && !dv.quasi) + throw loop_error( + "loop error: Skewing is illegal, dependence violation!"); + } + } + + j->second = dvs; + } + } else { + // dependence from skewed statement to unskewed statement becomes jumbled, + // put distance value at skewed dimension to unknown + /*std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + } + j->second = dvs; + */ + dont_consider.insert(j->first); + } + for (int l = 0; l < dep.vertex.size(); l++) + if (stmt_nums.find(l) == stmt_nums.end()) + if (dont_consider.find(l) == stmt_nums.end() + && (dep.vertex[l].second.find(*i) + != dep.vertex[l].second.end())) + dont_consider.insert(l); + array_of_deps.push_back(dont_consider); + } + /*for (int i = 0; i < dep.vertex.size(); i++) + if (stmt_nums.find(i) == stmt_nums.end()) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence from unskewed statement to skewed statement becomes jumbled, + // put distance value at skewed dimension to unknown + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + dv.lbounds[dep_dim] = -posInfinity; + dv.ubounds[dep_dim] = posInfinity; + } + } + j->second = dvs; + } + }*/ + std::set<int>::const_iterator w = stmt_nums.begin(); + for (int i = 0; i < array_of_deps.size() && w != stmt_nums.end(); i++) + for (std::set<int>::const_iterator j = array_of_deps[i].begin(); + j != array_of_deps[i].end(); j++) { + if (dep.vertex[*w].second.find(*j) != dep.vertex[*w].second.end()) + dep.disconnect(*w, *j); + if (dep.vertex[*j].second.find(*w) != dep.vertex[*j].second.end()) + dep.disconnect(*j, *w); + int x, y; + std::pair<std::vector<DependenceVector>, + std::vector<DependenceVector> > dv_s; + if ((*w) <= (*j)) { + x = *w; + y = *j; + + dv_s = test_data_dependences(ir_, stmt[x].code, stmt[x].IS, + stmt[y].code, stmt[y].IS, freevar, index, x, y); + } else { + x = *j; + y = *w; + dv_s = test_data_dependences(ir_, stmt[y].code, stmt[y].IS, + stmt[x].code, stmt[x].IS, freevar, index, x, y); + } + for (int k = 0; k < dv_s.first.size(); k++) { + if (is_dependence_valid_based_on_lex_order(x, y, dv_s.first[k], + true)) + dep.connect(x, y, dv_s.first[k]); + else + dep.connect(y, x, dv_s.first[k].reverse()); + } + for (int k = 0; k < dv_s.second.size(); k++) { + if (is_dependence_valid_based_on_lex_order(x, y, dv_s.second[k], + false)) + dep.connect(y, x, dv_s.second[k]); + else + dep.connect(x, y, dv_s.second[k].reverse()); + } + w++; + } +} + +void Loop::shift(const std::set<int> &stmt_nums, int level, int shift_amount) { + if (stmt_nums.size() == 0) + return; + + // check for sanity of parameters + int ref_stmt_num = *(stmt_nums.begin()); + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + if (*i < 0 || *i >= stmt.size()) + throw std::invalid_argument( + "invalid statement number " + to_string(*i)); + if (level < 1 || level > stmt[*i].loop_level.size()) + throw std::invalid_argument( + "invalid loop level " + to_string(level)); + } + + // do nothing + if (shift_amount == 0) + return; + + // set trasformation relations + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) { + int n = stmt[*i].xform.n_out(); + + Relation r(n, n); + F_And *f_root = r.add_and(); + for (int j = 1; j <= n; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + if (j == 2 * level) + h.update_const(shift_amount); + } + + stmt[*i].xform = Composition(r, stmt[*i].xform); + stmt[*i].xform.simplify(); + } + + // update dependence graph + if (stmt[ref_stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[ref_stmt_num].loop_level[level - 1].payload; + for (std::set<int>::const_iterator i = stmt_nums.begin(); + i != stmt_nums.end(); i++) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); + j != dep.vertex[*i].second.end(); j++) + if (stmt_nums.find(j->first) == stmt_nums.end()) { + // dependence from shifted statement to unshifted statement + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + if (dv.lbounds[dep_dim] != -posInfinity) + dv.lbounds[dep_dim] -= shift_amount; + if (dv.ubounds[dep_dim] != posInfinity) + dv.ubounds[dep_dim] -= shift_amount; + } + } + j->second = dvs; + } + for (int i = 0; i < dep.vertex.size(); i++) + if (stmt_nums.find(i) == stmt_nums.end()) + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end(); j++) + if (stmt_nums.find(j->first) != stmt_nums.end()) { + // dependence from unshifted statement to shifted statement + std::vector<DependenceVector> dvs = j->second; + for (int k = 0; k < dvs.size(); k++) { + DependenceVector &dv = dvs[k]; + if (dv.is_data_dependence()) { + if (dv.lbounds[dep_dim] != -posInfinity) + dv.lbounds[dep_dim] += shift_amount; + if (dv.ubounds[dep_dim] != posInfinity) + dv.ubounds[dep_dim] += shift_amount; + } + } + j->second = dvs; + } + } +} + +// bool Loop::fuse(const std::set<int> &stmt_nums, int level) { +// if (stmt_nums.size() == 0 || stmt_nums.size() == 1) +// return true; +// int dim = 2*level-1; + +// // check for sanity of parameters +// std::vector<int> ref_lex; +// for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { +// if (*i < 0 || *i >= stmt.size()) +// throw std::invalid_argument("invalid statement number " + to_string(*i)); +// if (level < 1 || level > (stmt[*i].xform.n_out()-1)/2) +// throw std::invalid_argument("invalid loop level " + to_string(level)); +// if (ref_lex.size() == 0) +// ref_lex = getLexicalOrder(*i); +// else { +// std::vector<int> lex = getLexicalOrder(*i); +// for (int j = 0; j < dim-1; j+=2) +// if (lex[j] != ref_lex[j]) +// throw std::invalid_argument("statements for fusion must be in the same level-" + to_string(level-1) + " subloop"); +// } +// } + +// // collect lexicographical order values from to-be-fused statements +// std::set<int> lex_values; +// for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { +// std::vector<int> lex = getLexicalOrder(*i); +// lex_values.insert(lex[dim-1]); +// } +// if (lex_values.size() == 1) +// return true; + +// // negative dependence would prevent fusion +// int dep_dim = xform_index[dim].first; +// for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end(); i++) { +// ref_lex[dim-1] = *i; +// std::set<int> a = getStatements(ref_lex, dim-1); +// std::set<int>::iterator j = i; +// j++; +// for (; j != lex_values.end(); j++) { +// ref_lex[dim-1] = *j; +// std::set<int> b = getStatements(ref_lex, dim-1); +// for (std::set<int>::iterator ii = a.begin(); ii != a.end(); ii++) +// for (std::set<int>::iterator jj = b.begin(); jj != b.end(); jj++) { +// std::vector<DependenceVector> dvs; +// dvs = dep.getEdge(*ii, *jj); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim) && dvs[k].hasNegative(dep_dim)) +// throw loop_error("loop error: statements " + to_string(*ii) + " and " + to_string(*jj) + " cannot be fused together due to negative dependence"); +// dvs = dep.getEdge(*jj, *ii); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim) && dvs[k].hasNegative(dep_dim)) +// throw loop_error("loop error: statements " + to_string(*jj) + " and " + to_string(*ii) + " cannot be fused together due to negative dependence"); +// } +// } +// } + +// // collect all other lexicographical order values from the subloop +// // enclosing these to-be-fused loops +// std::set<int> same_loop = getStatements(ref_lex, dim-3); +// std::set<int> other_lex_values; +// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { +// std::vector<int> lex = getLexicalOrder(*i); +// if (lex_values.find(lex[dim-1]) == lex_values.end()) +// other_lex_values.insert(lex[dim-1]); +// } + +// // update to-be-fused loops due to dependence cycle +// Graph<std::set<int>, Empty> g; +// { +// std::set<int> t; +// for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end(); i++) { +// ref_lex[dim-1] = *i; +// std::set<int> t2 = getStatements(ref_lex, dim-1); +// std::set_union(t.begin(), t.end(), t2.begin(), t2.end(), inserter(t, t.begin())); +// } +// g.insert(t); +// } +// for (std::set<int>::iterator i = other_lex_values.begin(); i != other_lex_values.end(); i++) { +// ref_lex[dim-1] = *i; +// std::set<int> t = getStatements(ref_lex, dim-1); +// g.insert(t); +// } +// for (int i = 0; i < g.vertex.size(); i++) +// for (int j = i+1; j < g.vertex.size(); j++) +// for (std::set<int>::iterator ii = g.vertex[i].first.begin(); ii != g.vertex[i].first.end(); ii++) +// for (std::set<int>::iterator jj = g.vertex[j].first.begin(); jj != g.vertex[j].first.end(); jj++) { +// std::vector<DependenceVector> dvs; +// dvs = dep.getEdge(*ii, *jj); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim)) { +// g.connect(i, j); +// break; +// } +// dvs = dep.getEdge(*jj, *ii); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim)) { +// g.connect(j, i); +// break; +// } +// } +// std::vector<std::set<int> > s = g.topoSort(); +// int fused_lex_value = 0; +// for (int i = 0; i < s.size(); i++) +// if (s[i].find(0) != s[i].end()) { +// // now add additional lexicographical order values +// for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) +// if (*j != 0) { +// int stmt = *(g.vertex[*j].first.begin()); +// std::vector<int> lex = getLexicalOrder(stmt); +// lex_values.insert(lex[dim-1]); +// } + +// if (s.size() > 1) { +// if (i == 0) { +// int min_lex_value; +// for (std::set<int>::iterator j = s[i+1].begin(); j != s[i+1].end(); j++) { +// int stmt = *(g.vertex[*j].first.begin()); +// std::vector<int> lex = getLexicalOrder(stmt); +// if (j == s[i+1].begin()) +// min_lex_value = lex[dim-1]; +// else if (lex[dim-1] < min_lex_value) +// min_lex_value = lex[dim-1]; +// } +// fused_lex_value = min_lex_value - 1; +// } +// else { +// int max_lex_value; +// for (std::set<int>::iterator j = s[i-1].begin(); j != s[i-1].end(); j++) { +// int stmt = *(g.vertex[*j].first.begin()); +// std::vector<int> lex = getLexicalOrder(stmt); +// if (j == s[i-1].begin()) +// max_lex_value = lex[dim-1]; +// else if (lex[dim-1] > max_lex_value) +// max_lex_value = lex[dim-1]; +// } +// fused_lex_value = max_lex_value + 1; +// } +// } + +// break; +// } + +// // sort the newly updated to-be-fused lexicographical order values +// std::vector<int> ordered_lex_values; +// for (std::set<int>::iterator i = lex_values.begin(); i != lex_values.end(); i++) +// ordered_lex_values.push_back(*i); +// std::sort(ordered_lex_values.begin(), ordered_lex_values.end()); + +// // make sure internal loops inside to-be-fused loops have the same +// // lexicographical order before and after fusion +// std::vector<std::pair<int, int> > inside_lex_range(ordered_lex_values.size()); +// for (int i = 0; i < ordered_lex_values.size(); i++) { +// ref_lex[dim-1] = ordered_lex_values[i]; +// std::set<int> the_stmts = getStatements(ref_lex, dim-1); +// std::set<int>::iterator j = the_stmts.begin(); +// std::vector<int> lex = getLexicalOrder(*j); +// int min_inside_lex_value = lex[dim+1]; +// int max_inside_lex_value = lex[dim+1]; +// j++; +// for (; j != the_stmts.end(); j++) { +// std::vector<int> lex = getLexicalOrder(*j); +// if (lex[dim+1] < min_inside_lex_value) +// min_inside_lex_value = lex[dim+1]; +// if (lex[dim+1] > max_inside_lex_value) +// max_inside_lex_value = lex[dim+1]; +// } +// inside_lex_range[i].first = min_inside_lex_value; +// inside_lex_range[i].second = max_inside_lex_value; +// } +// for (int i = 1; i < ordered_lex_values.size(); i++) +// if (inside_lex_range[i].first <= inside_lex_range[i-1].second) { +// int shift_lex_value = inside_lex_range[i-1].second - inside_lex_range[i].first + 1; +// ref_lex[dim-1] = ordered_lex_values[i]; +// ref_lex[dim+1] = inside_lex_range[i].first; +// shiftLexicalOrder(ref_lex, dim+1, shift_lex_value); +// inside_lex_range[i].first += shift_lex_value; +// inside_lex_range[i].second += shift_lex_value; +// } + +// // set lexicographical order for fused loops +// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { +// std::vector<int> lex = getLexicalOrder(*i); +// if (lex_values.find(lex[dim-1]) != lex_values.end()) +// assign_const(stmt[*i].xform, dim-1, fused_lex_value); +// } + +// // no need to update dependence graph +// ; + +// return true; +// } + +// bool Loop::distribute(const std::set<int> &stmt_nums, int level) { +// if (stmt_nums.size() == 0 || stmt_nums.size() == 1) +// return true; +// int dim = 2*level-1; + +// // check for sanity of parameters +// std::vector<int> ref_lex; +// for (std::set<int>::const_iterator i = stmt_nums.begin(); i != stmt_nums.end(); i++) { +// if (*i < 0 || *i >= stmt.size()) +// throw std::invalid_argument("invalid statement number " + to_string(*i)); +// if (level < 1 || level > (stmt[*i].xform.n_out()-1)/2) +// throw std::invalid_argument("invalid loop level " + to_string(level)); +// if (ref_lex.size() == 0) +// ref_lex = getLexicalOrder(*i); +// else { +// std::vector<int> lex = getLexicalOrder(*i); +// for (int j = 0; j <= dim-1; j+=2) +// if (lex[j] != ref_lex[j]) +// throw std::invalid_argument("statements for distribution must be in the same level-" + to_string(level) + " subloop"); +// } +// } + +// // find SCC in the to-be-distributed loop +// int dep_dim = xform_index[dim].first; +// std::set<int> same_loop = getStatements(ref_lex, dim-1); +// Graph<int, Empty> g; +// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) +// g.insert(*i); +// for (int i = 0; i < g.vertex.size(); i++) +// for (int j = i+1; j < g.vertex.size(); j++) { +// std::vector<DependenceVector> dvs; +// dvs = dep.getEdge(g.vertex[i].first, g.vertex[j].first); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim)) { +// g.connect(i, j); +// break; +// } +// dvs = dep.getEdge(g.vertex[j].first, g.vertex[i].first); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim)) { +// g.connect(j, i); +// break; +// } +// } +// std::vector<std::set<int> > s = g.topoSort(); + +// // find statements that cannot be distributed due to dependence cycle +// Graph<std::set<int>, Empty> g2; +// for (int i = 0; i < s.size(); i++) { +// std::set<int> t; +// for (std::set<int>::iterator j = s[i].begin(); j != s[i].end(); j++) +// if (stmt_nums.find(g.vertex[*j].first) != stmt_nums.end()) +// t.insert(g.vertex[*j].first); +// if (!t.empty()) +// g2.insert(t); +// } +// for (int i = 0; i < g2.vertex.size(); i++) +// for (int j = i+1; j < g2.vertex.size(); j++) +// for (std::set<int>::iterator ii = g2.vertex[i].first.begin(); ii != g2.vertex[i].first.end(); ii++) +// for (std::set<int>::iterator jj = g2.vertex[j].first.begin(); jj != g2.vertex[j].first.end(); jj++) { +// std::vector<DependenceVector> dvs; +// dvs = dep.getEdge(*ii, *jj); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim)) { +// g2.connect(i, j); +// break; +// } +// dvs = dep.getEdge(*jj, *ii); +// for (int k = 0; k < dvs.size(); k++) +// if (dvs[k].isCarried(dep_dim)) { +// g2.connect(j, i); +// break; +// } +// } +// std::vector<std::set<int> > s2 = g2.topoSort(); + +// // nothing to distribute +// if (s2.size() == 1) +// throw loop_error("loop error: no statement can be distributed due to dependence cycle"); + +// std::vector<std::set<int> > s3; +// for (int i = 0; i < s2.size(); i++) { +// std::set<int> t; +// for (std::set<int>::iterator j = s2[i].begin(); j != s2[i].end(); j++) +// std::set_union(t.begin(), t.end(), g2.vertex[*j].first.begin(), g2.vertex[*j].first.end(), inserter(t, t.begin())); +// s3.push_back(t); +// } + +// // associate other affected statements with the right distributed statements +// for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) +// if (stmt_nums.find(*i) == stmt_nums.end()) { +// bool is_inserted = false; +// int potential_insertion_point = 0; +// for (int j = 0; j < s3.size(); j++) { +// for (std::set<int>::iterator k = s3[j].begin(); k != s3[j].end(); k++) { +// std::vector<DependenceVector> dvs; +// dvs = dep.getEdge(*i, *k); +// for (int kk = 0; kk < dvs.size(); kk++) +// if (dvs[kk].isCarried(dep_dim)) { +// s3[j].insert(*i); +// is_inserted = true; +// break; +// } +// dvs = dep.getEdge(*k, *i); +// for (int kk = 0; kk < dvs.size(); kk++) +// if (dvs[kk].isCarried(dep_dim)) +// potential_insertion_point = j; +// } +// if (is_inserted) +// break; +// } + +// if (!is_inserted) +// s3[potential_insertion_point].insert(*i); +// } + +// // set lexicographical order after distribution +// int order = ref_lex[dim-1]; +// shiftLexicalOrder(ref_lex, dim-1, s3.size()-1); +// for (std::vector<std::set<int> >::iterator i = s3.begin(); i != s3.end(); i++) { +// for (std::set<int>::iterator j = (*i).begin(); j != (*i).end(); j++) +// assign_const(stmt[*j].xform, dim-1, order); +// order++; +// } + +// // no need to update dependence graph +// ; + +// return true; +// } + diff --git a/loop_tile.cc b/loop_tile.cc new file mode 100644 index 0000000..ad1d3b7 --- /dev/null +++ b/loop_tile.cc @@ -0,0 +1,630 @@ +/* + * loop_tile.cc + * + * Created on: Nov 12, 2012 + * Author: anand + */ + +#include <codegen.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + + + + +void Loop::tile(int stmt_num, int level, int tile_size, int outer_level, + TilingMethodType method, int alignment_offset, int alignment_multiple) { + // check for sanity of parameters + if (tile_size < 0) + throw std::invalid_argument("invalid tile size"); + if (alignment_multiple < 1 || alignment_offset < 0) + throw std::invalid_argument("invalid alignment for tile"); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0) + throw std::invalid_argument("invalid loop level " + to_string(level)); + if (level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument( + "there is no loop level " + to_string(level) + " for statement " + + to_string(stmt_num)); + if (outer_level <= 0 || outer_level > level) + throw std::invalid_argument( + "invalid tile controlling loop level " + + to_string(outer_level)); + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + int dim = 2 * level - 1; + int outer_dim = 2 * outer_level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_tiled_loop = getStatements(lex, dim - 1); + std::set<int> same_tile_controlling_loop = getStatements(lex, + outer_dim - 1); + + for (std::set<int>::iterator i = same_tiled_loop.begin(); + i != same_tiled_loop.end(); i++) { + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); + j++) { + if (same_tiled_loop.find(j->first) != same_tiled_loop.end()) + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + int dim2 = level - 1; + if ((dv.type != DEP_CONTROL) && (dv.type != DEP_UNKNOWN)) { + while (stmt[*i].loop_level[dim2].type == LoopLevelTile) { + dim2 = stmt[*i].loop_level[dim2].payload - 1; + } + dim2 = stmt[*i].loop_level[dim2].payload; + + if (dv.hasNegative(dim2) && (!dv.quasi)) { + for (int l = outer_level; l < level; l++) + if (stmt[*i].loop_level[l - 1].type + != LoopLevelTile) { + if (dv.isCarried( + stmt[*i].loop_level[l - 1].payload) + && dv.hasPositive( + stmt[*i].loop_level[l - 1].payload)) + throw loop_error( + "loop error: Tiling is illegal, dependence violation!"); + } else { + + int dim3 = l - 1; + while (stmt[*i].loop_level[l - 1].type + != LoopLevelTile) { + dim3 = + stmt[*i].loop_level[l - 1].payload + - 1; + + } + + dim3 = stmt[*i].loop_level[l - 1].payload; + if (dim3 < level - 1) + if (dv.isCarried(dim3) + && dv.hasPositive(dim3)) + throw loop_error( + "loop error: Tiling is illegal, dependence violation!"); + } + } + } + } + } + } + // special case for no tiling + if (tile_size == 0) { + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); + F_And *f_root = r.add_and(); + for (int j = 1; j <= 2 * outer_level - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j), -1); + } + EQ_Handle h1 = f_root->add_EQ(); + h1.update_coef(r.output_var(2 * outer_level), 1); + EQ_Handle h2 = f_root->add_EQ(); + h2.update_coef(r.output_var(2 * outer_level + 1), 1); + for (int j = 2 * outer_level; j <= stmt[*i].xform.n_out(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.input_var(j), 1); + h.update_coef(r.output_var(j + 2), -1); + } + + stmt[*i].xform = Composition(copy(r), stmt[*i].xform); + } + } + // normal tiling + else { + std::set<int> private_stmt; + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { +// if (same_tiled_loop.find(*i) == same_tiled_loop.end() && !is_single_iteration(getNewIS(*i), dim)) +// same_tiled_loop.insert(*i); + + // should test dim's value directly but it is ok for now +// if (same_tiled_loop.find(*i) == same_tiled_loop.end() && get_const(stmt[*i].xform, dim+1, Output_Var) == posInfinity) + if (same_tiled_loop.find(*i) == same_tiled_loop.end() + && overflow.find(*i) != overflow.end()) + private_stmt.insert(*i); + } + + // extract the union of the iteration space to be considered + Relation hull; + /*{ + Tuple < Relation > r_list; + Tuple<int> r_mask; + + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) + if (private_stmt.find(*i) == private_stmt.end()) { + Relation r = project_onto_levels(getNewIS(*i), dim + 1, + true); + for (int j = outer_dim; j < dim; j++) + r = Project(r, j + 1, Set_Var); + for (int j = 0; j < outer_dim; j += 2) + r = Project(r, j + 1, Set_Var); + r_list.append(r); + r_mask.append(1); + } + + hull = Hull(r_list, r_mask, 1, true); + }*/ + + { + std::vector<Relation> r_list; + + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) + if (private_stmt.find(*i) == private_stmt.end()) { + Relation r = getNewIS(*i); + for (int j = dim + 2; j <= r.n_set(); j++) + r = Project(r, r.set_var(j)); + for (int j = outer_dim; j < dim; j++) + r = Project(r, j + 1, Set_Var); + for (int j = 0; j < outer_dim; j += 2) + r = Project(r, j + 1, Set_Var); + r.simplify(2, 4); + r_list.push_back(r); + } + + hull = SimpleHull(r_list); + // hull = Hull(r_list, std::vector<bool>(r_list.size(), true), 1, true); + } + + // extract the bound of the dimension to be tiled + Relation bound = get_loop_bound(hull, dim); + if (!bound.has_single_conjunct()) { + // further simplify the bound + hull = Approximate(hull); + bound = get_loop_bound(hull, dim); + + int i = outer_dim - 2; + while (!bound.has_single_conjunct() && i >= 0) { + hull = Project(hull, i + 1, Set_Var); + bound = get_loop_bound(hull, dim); + i -= 2; + } + + if (!bound.has_single_conjunct()) + throw loop_error("cannot handle tile bounds"); + } + + // separate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + { + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(dim + 1)); + if (coef < 0) + ub_list.push_back(*gi); + else if (coef > 0) + lb_list.push_back(*gi); + } + } + if (lb_list.size() == 0) + throw loop_error( + "unable to calculate tile controlling loop lower bound"); + if (ub_list.size() == 0) + throw loop_error( + "unable to calculate tile controlling loop upper bound"); + + // find the simplest lower bound for StridedTile or simplest iteration count for CountedTile + int simplest_lb = 0, simplest_ub = 0; + if (method == StridedTile) { + int best_cost = INT_MAX; + for (int i = 0; i < lb_list.size(); i++) { + int cost = 0; + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + cost += 5; + break; + } + case Global_Var: { + cost += 2; + break; + } + default: + cost += 15; + break; + } + } + + if (cost < best_cost) { + best_cost = cost; + simplest_lb = i; + } + } + } else if (method == CountedTile) { + std::map<Variable_ID, coef_t> s1, s2, s3; + int best_cost = INT_MAX; + for (int i = 0; i < lb_list.size(); i++) + for (int j = 0; j < ub_list.size(); j++) { + int cost = 0; + + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + cost = INT_MAX - 2; + break; + } + } + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + s1[(*ci).var] += (*ci).coef; + break; + } + case Global_Var: { + s2[(*ci).var] += (*ci).coef; + break; + } + case Exists_Var: + case Wildcard_Var: { + s3[(*ci).var] += (*ci).coef; + break; + } + default: + if (cost == INT_MAX - 2) + cost = INT_MAX - 1; + else + cost = INT_MAX - 3; + break; + } + } + + if (cost == 0) { + for (std::map<Variable_ID, coef_t>::iterator k = + s1.begin(); k != s1.end(); k++) + if ((*k).second != 0) + cost += 5; + for (std::map<Variable_ID, coef_t>::iterator k = + s2.begin(); k != s2.end(); k++) + if ((*k).second != 0) + cost += 2; + for (std::map<Variable_ID, coef_t>::iterator k = + s3.begin(); k != s3.end(); k++) + if ((*k).second != 0) + cost += 15; + } + + if (cost < best_cost) { + best_cost = cost; + simplest_lb = i; + simplest_ub = j; + } + } + } + + // prepare the new transformation relations + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + Relation r(stmt[*i].xform.n_out(), stmt[*i].xform.n_out() + 2); + F_And *f_root = r.add_and(); + for (int j = 0; j < outer_dim - 1; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(j + 1), 1); + h.update_coef(r.input_var(j + 1), -1); + } + + for (int j = outer_dim - 1; j < stmt[*i].xform.n_out(); j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(j + 3), 1); + h.update_coef(r.input_var(j + 1), -1); + } + + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(outer_dim), 1); + h.update_const(-lex[outer_dim - 1]); + + stmt[*i].xform = Composition(r, stmt[*i].xform); + } + + // add tiling constraints. + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + F_And *f_super_root = stmt[*i].xform.and_with_and(); + F_Exists *f_exists = f_super_root->add_exists(); + F_And *f_root = f_exists->add_and(); + + // create a lower bound variable for easy formula creation later + Variable_ID aligned_lb; + { + Variable_ID lb = f_exists->declare(); + coef_t coef = lb_list[simplest_lb].get_coef( + bound.set_var(dim + 1)); + if (coef == 1) { // e.g. if i >= m+5, then LB = m+5 + EQ_Handle h = f_root->add_EQ(); + h.update_coef(lb, 1); + for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h.update_const(lb_list[simplest_lb].get_const()); + } else { // e.g. if 2i >= m+5, then m+5 <= 2*LB < m+5+2 + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + for (Constr_Vars_Iter ci(lb_list[simplest_lb]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(lb, (*ci).coef); + h2.update_coef(lb, -(*ci).coef); + } else { + h1.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), + -(*ci).coef); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h1.update_coef(v, (*ci).coef); + h2.update_coef(v, -(*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h1.update_const(lb_list[simplest_lb].get_const()); + h2.update_const(-lb_list[simplest_lb].get_const()); + h2.update_const(coef - 1); + } + + Variable_ID offset_lb; + if (alignment_offset == 0) + offset_lb = lb; + else { + EQ_Handle h = f_root->add_EQ(); + offset_lb = f_exists->declare(); + h.update_coef(offset_lb, 1); + h.update_coef(lb, -1); + h.update_const(alignment_offset); + } + + if (alignment_multiple == 1) { // trivial + aligned_lb = offset_lb; + } else { // e.g. to align at 4, aligned_lb = 4*alpha && LB-4 < 4*alpha <= LB + aligned_lb = f_exists->declare(); + Variable_ID e = f_exists->declare(); + + EQ_Handle h = f_root->add_EQ(); + h.update_coef(aligned_lb, 1); + h.update_coef(e, -alignment_multiple); + + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + h1.update_coef(e, alignment_multiple); + h2.update_coef(e, -alignment_multiple); + h1.update_coef(offset_lb, -1); + h2.update_coef(offset_lb, 1); + h1.update_const(alignment_multiple - 1); + } + } + + // create an upper bound variable for easy formula creation later + Variable_ID ub = f_exists->declare(); + { + coef_t coef = -ub_list[simplest_ub].get_coef( + bound.set_var(dim + 1)); + if (coef == 1) { // e.g. if i <= m+5, then UB = m+5 + EQ_Handle h = f_root->add_EQ(); + h.update_coef(ub, -1); + for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos != dim + 1) + h.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h.update_const(ub_list[simplest_ub].get_const()); + } else { // e.g. if 2i <= m+5, then m+5-2 < 2*UB <= m+5 + GEQ_Handle h1 = f_root->add_GEQ(); + GEQ_Handle h2 = f_root->add_GEQ(); + for (Constr_Vars_Iter ci(ub_list[simplest_ub]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(ub, -(*ci).coef); + h2.update_coef(ub, (*ci).coef); + } else { + h1.update_coef(stmt[*i].xform.output_var(pos), + -(*ci).coef); + h2.update_coef(stmt[*i].xform.output_var(pos), + (*ci).coef); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = stmt[*i].xform.get_local(g); + else + v = stmt[*i].xform.get_local(g, + (*ci).var->function_of()); + h1.update_coef(v, -(*ci).coef); + h2.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot handle tile bounds"); + } + } + h1.update_const(-ub_list[simplest_ub].get_const()); + h2.update_const(ub_list[simplest_ub].get_const()); + h1.update_const(coef - 1); + } + } + + // insert tile controlling loop constraints + if (method == StridedTile) { // e.g. ii = LB + 32 * alpha && alpha >= 0 + Variable_ID e = f_exists->declare(); + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(e, 1); + + EQ_Handle h2 = f_root->add_EQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); + h2.update_coef(e, -tile_size); + h2.update_coef(aligned_lb, -1); + } else if (method == CountedTile) { // e.g. 0 <= ii < ceiling((UB-LB+1)/32) + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + -tile_size); + h2.update_coef(aligned_lb, -1); + h2.update_coef(ub, 1); + } + + // special care for private statements like overflow assignment + if (private_stmt.find(*i) != private_stmt.end()) { // e.g. ii <= UB + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(stmt[*i].xform.output_var(outer_dim + 1), -1); + h.update_coef(ub, 1); + } + // if (private_stmt.find(*i) != private_stmt.end()) { + // if (stmt[*i].xform.n_out() > dim+3) { // e.g. ii <= UB && i = ii + // GEQ_Handle h = f_root->add_GEQ(); + // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + // h.update_coef(ub, 1); + + // stmt[*i].xform = Project(stmt[*i].xform, dim+3, Output_Var); + // f_root = stmt[*i].xform.and_with_and(); + // EQ_Handle h1 = f_root->add_EQ(); + // h1.update_coef(stmt[*i].xform.output_var(dim+3), 1); + // h1.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + // } + // else if (method == StridedTile) { // e.g. ii <= UB since i does not exist + // GEQ_Handle h = f_root->add_GEQ(); + // h.update_coef(stmt[*i].xform.output_var(outer_dim+1), -1); + // h.update_coef(ub, 1); + // } + // } + + // restrict original loop index inside the tile + else { + if (method == StridedTile) { // e.g. ii <= i < ii + tile_size + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); + h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + -1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), 1); + h2.update_const(tile_size - 1); + } else if (method == CountedTile) { // e.g. LB+32*ii <= i < LB+32*ii+tile_size + GEQ_Handle h1 = f_root->add_GEQ(); + h1.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + -tile_size); + h1.update_coef(stmt[*i].xform.output_var(dim + 3), 1); + h1.update_coef(aligned_lb, -1); + + GEQ_Handle h2 = f_root->add_GEQ(); + h2.update_coef(stmt[*i].xform.output_var(outer_dim + 1), + tile_size); + h2.update_coef(stmt[*i].xform.output_var(dim + 3), -1); + h2.update_const(tile_size - 1); + h2.update_coef(aligned_lb, 1); + } + } + } + } + + // update loop level information + for (std::set<int>::iterator i = same_tile_controlling_loop.begin(); + i != same_tile_controlling_loop.end(); i++) { + for (int j = 1; j <= stmt[*i].loop_level.size(); j++) + switch (stmt[*i].loop_level[j - 1].type) { + case LoopLevelOriginal: + break; + case LoopLevelTile: + if (stmt[*i].loop_level[j - 1].payload >= outer_level) + stmt[*i].loop_level[j - 1].payload++; + break; + default: + throw loop_error( + "unknown loop level type for statement " + + to_string(*i)); + } + + LoopLevel ll; + ll.type = LoopLevelTile; + ll.payload = level + 1; + ll.parallel_level = 0; + stmt[*i].loop_level.insert( + stmt[*i].loop_level.begin() + (outer_level - 1), ll); + } +} + diff --git a/loop_unroll.cc b/loop_unroll.cc new file mode 100644 index 0000000..b75b738 --- /dev/null +++ b/loop_unroll.cc @@ -0,0 +1,1166 @@ +/* + * loop_unroll.cc + * + * Created on: Nov 12, 2012 + * Author: anand + */ + +#include <codegen.h> +#include <code_gen/CG_utils.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" +#include <math.h> + +using namespace omega; + + +std::set<int> Loop::unroll(int stmt_num, int level, int unroll_amount, + std::vector<std::vector<std::string> > idxNames, + int cleanup_split_level) { + // check for sanity of parameters + // check for sanity of parameters + if (unroll_amount < 0) + throw std::invalid_argument( + "invalid unroll amount " + to_string(unroll_amount)); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + if (cleanup_split_level == 0) + cleanup_split_level = level; + if (cleanup_split_level > level) + throw std::invalid_argument( + "cleanup code must be split at or outside the unrolled loop level " + + to_string(level)); + if (cleanup_split_level <= 0) + throw std::invalid_argument( + "invalid split loop level " + to_string(cleanup_split_level)); + + // invalidate saved codegen computation + delete last_compute_cgr_; + last_compute_cgr_ = NULL; + delete last_compute_cg_; + last_compute_cg_ = NULL; + + int dim = 2 * level - 1; + std::vector<int> lex = getLexicalOrder(stmt_num); + std::set<int> same_loop = getStatements(lex, dim - 1); + + // nothing to do + if (unroll_amount == 1) + return std::set<int>(); + + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); + i++) { + std::vector<std::pair<int, DependenceVector> > D; + int n = stmt[*i].xform.n_out(); + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); j != dep.vertex[*i].second.end(); + j++) { + if (same_loop.find(j->first) != same_loop.end()) + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + int dim2 = level - 1; + if (dv.type != DEP_CONTROL) { + + while (stmt[*i].loop_level[dim2].type == LoopLevelTile) { + dim2 = stmt[*i].loop_level[dim2].payload - 1; + } + dim2 = stmt[*i].loop_level[dim2].payload; + + /*if (dv.isCarried(dim2) + && (dv.hasNegative(dim2) && !dv.quasi)) + throw loop_error( + "loop error: Unrolling is illegal, dependence violation!"); + + if (dv.isCarried(dim2) + && (dv.hasPositive(dim2) && dv.quasi)) + throw loop_error( + "loop error: Unrolling is illegal, dependence violation!"); + */ + bool safe = false; + + if (dv.isCarried(dim2) && dv.hasPositive(dim2)) { + if (dv.quasi) + throw loop_error( + "loop error: a quasi dependence with a positive carried distance"); + if (!dv.quasi) { + if (dv.lbounds[dim2] != posInfinity) { + //if (dv.lbounds[dim2] != negInfinity) + if (dv.lbounds[dim2] > unroll_amount) + safe = true; + } else + safe = true; + }/* else { + if (dv.ubounds[dim2] != negInfinity) { + if (dv.ubounds[dim2] != posInfinity) + if ((-(dv.ubounds[dim2])) > unroll_amount) + safe = true; + } else + safe = true; + }*/ + + if (!safe) { + for (int l = level + 1; l <= (n - 1) / 2; l++) { + int dim3 = l - 1; + + if (stmt[*i].loop_level[dim3].type + != LoopLevelTile) + dim3 = + stmt[*i].loop_level[dim3].payload; + else { + while (stmt[*i].loop_level[dim3].type + == LoopLevelTile) { + dim3 = + stmt[*i].loop_level[dim3].payload + - 1; + } + dim3 = + stmt[*i].loop_level[dim3].payload; + } + + if (dim3 > dim2) { + + if (dv.hasPositive(dim3)) + break; + else if (dv.hasNegative(dim3)) + throw loop_error( + "loop error: Unrolling is illegal, dependence violation!"); + } + } + } + } + } + } + } + } + // extract the intersection of the iteration space to be considered + Relation hull = Relation::True(level); + apply_xform(same_loop); + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); + i++) { + if (stmt[*i].IS.is_upper_bound_satisfiable()) { + Relation mapping(stmt[*i].IS.n_set(), level); + F_And *f_root = mapping.add_and(); + for (int j = 1; j <= level; j++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(j), 1); + h.update_coef(mapping.output_var(j), -1); + } + hull = Intersection(hull, + Range(Restrict_Domain(mapping, copy(stmt[*i].IS)))); + hull.simplify(2, 4); + + } + } + for (int i = 1; i <= level; i++) { + std::string name = tmp_loop_var_name_prefix + to_string(i); + hull.name_set_var(i, name); + } + hull.setup_names(); + + // extract the exact loop bound of the dimension to be unrolled + if (is_single_loop_iteration(hull, level, this->known)) + return std::set<int>(); + Relation bound = get_loop_bound(hull, level, this->known); + if (!bound.has_single_conjunct() || !bound.is_satisfiable() + || bound.is_tautology()) + throw loop_error("unable to extract loop bound for unrolling"); + + // extract the loop stride + coef_t stride; + std::pair<EQ_Handle, Variable_ID> result = find_simplest_stride(bound, + bound.set_var(level)); + if (result.second == NULL) + stride = 1; + else + stride = abs(result.first.get_coef(result.second)) + / gcd(abs(result.first.get_coef(result.second)), + abs(result.first.get_coef(bound.set_var(level)))); + + // separate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + { + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(level)); + if (coef < 0) + ub_list.push_back(*gi); + else if (coef > 0) + lb_list.push_back(*gi); + } + } + + // simplify overflow expression for each pair of upper and lower bounds + std::vector<std::vector<std::map<Variable_ID, int> > > overflow_table( + lb_list.size(), + std::vector<std::map<Variable_ID, int> >(ub_list.size(), + std::map<Variable_ID, int>())); + bool is_overflow_simplifiable = true; + for (int i = 0; i < lb_list.size(); i++) { + if (!is_overflow_simplifiable) + break; + + for (int j = 0; j < ub_list.size(); j++) { + // lower bound or upper bound has non-unit coefficient, can't simplify + if (ub_list[j].get_coef(bound.set_var(level)) != -1 + || lb_list[i].get_coef(bound.set_var(level)) != 1) { + is_overflow_simplifiable = false; + break; + } + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + if ((*ci).var != bound.set_var(level)) + overflow_table[i][j][(*ci).var] += (*ci).coef; + + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); + overflow_table[i][j][(*ci).var] += (*ci).coef; + break; + } + default: + throw loop_error("failed to calculate overflow amount"); + } + } + overflow_table[i][j][NULL] += ub_list[j].get_const(); + + for (Constr_Vars_Iter ci(lb_list[i]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: { + if ((*ci).var != bound.set_var(level)) { + overflow_table[i][j][(*ci).var] += (*ci).coef; + if (overflow_table[i][j][(*ci).var] == 0) + overflow_table[i][j].erase( + overflow_table[i][j].find((*ci).var)); + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = bound.get_local(g); + else + v = bound.get_local(g, (*ci).var->function_of()); + overflow_table[i][j][(*ci).var] += (*ci).coef; + if (overflow_table[i][j][(*ci).var] == 0) + overflow_table[i][j].erase( + overflow_table[i][j].find((*ci).var)); + break; + } + default: + throw loop_error("failed to calculate overflow amount"); + } + } + overflow_table[i][j][NULL] += lb_list[i].get_const(); + + overflow_table[i][j][NULL] += stride; + if (unroll_amount == 0 + || (overflow_table[i][j].size() == 1 + && overflow_table[i][j][NULL] / stride + < unroll_amount)) + unroll_amount = overflow_table[i][j][NULL] / stride; + } + } + + // loop iteration count can't be determined, bail out gracefully + if (unroll_amount == 0) + return std::set<int>(); + + // further simply overflow calculation using coefficients' modular + if (is_overflow_simplifiable) { + for (int i = 0; i < lb_list.size(); i++) + for (int j = 0; j < ub_list.size(); j++) + if (stride == 1) { + for (std::map<Variable_ID, int>::iterator k = + overflow_table[i][j].begin(); + k != overflow_table[i][j].end();) + if ((*k).first != NULL) { + int t = int_mod_hat((*k).second, unroll_amount); + if (t == 0) { + overflow_table[i][j].erase(k++); + } else { + int t2 = hull.query_variable_mod((*k).first, + unroll_amount); + if (t2 != INT_MAX) { + overflow_table[i][j][NULL] += t * t2; + overflow_table[i][j].erase(k++); + } else { + (*k).second = t; + k++; + } + } + } else + k++; + + overflow_table[i][j][NULL] = int_mod_hat( + overflow_table[i][j][NULL], unroll_amount); + + // Since we don't have MODULO instruction in SUIF yet (only MOD), make all coef positive in the final formula + for (std::map<Variable_ID, int>::iterator k = + overflow_table[i][j].begin(); + k != overflow_table[i][j].end(); k++) + if ((*k).second < 0) + (*k).second += unroll_amount; + } + } + + // build overflow statement + CG_outputBuilder *ocg = ir->builder(); + CG_outputRepr *overflow_code = NULL; + Relation cond_upper(level), cond_lower(level); + Relation overflow_constraint(0); + F_And *overflow_constraint_root = overflow_constraint.add_and(); + std::vector<Free_Var_Decl *> over_var_list; + if (is_overflow_simplifiable && lb_list.size() == 1) { + for (int i = 0; i < ub_list.size(); i++) { + if (overflow_table[0][i].size() == 1) { + // upper splitting condition + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_const( + ((overflow_table[0][i][NULL] / stride) % unroll_amount) + * -stride); + } else { + // upper splitting condition + std::string over_name = overflow_var_name_prefix + + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_coef(cond_upper.get_local(over_free_var), -stride); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount - 1); + + // create overflow assignment + bound.setup_names(); // hack to fix omega relation variable names issue + CG_outputRepr *rhs = NULL; + bool is_split_illegal = false; + for (std::map<Variable_ID, int>::iterator j = + overflow_table[0][i].begin(); + j != overflow_table[0][i].end(); j++) + if ((*j).first != NULL) { + if ((*j).first->kind() == Input_Var + && (*j).first->get_position() + >= cleanup_split_level) + is_split_illegal = true; + + CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); + if ((*j).second != 1) + t = ocg->CreateTimes(ocg->CreateInt((*j).second), + t); + rhs = ocg->CreatePlus(rhs, t); + } else if ((*j).second != 0) + rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); + + if (is_split_illegal) { + rhs->clear(); + delete rhs; + throw loop_error( + "cannot split cleanup code at loop level " + + to_string(cleanup_split_level) + + " due to overflow variable data dependence"); + } + + if (stride != 1) + rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, + ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->StmtListAppend(overflow_code, + ocg->CreateAssignment(0, lhs, rhs)); + } + } + + // lower splitting condition + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[0]); + } else if (is_overflow_simplifiable && ub_list.size() == 1) { + for (int i = 0; i < lb_list.size(); i++) { + + if (overflow_table[i][0].size() == 1) { + // lower splitting condition + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + h.update_const(overflow_table[i][0][NULL] * -stride); + } else { + // lower splitting condition + std::string over_name = overflow_var_name_prefix + + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + h.update_coef(cond_lower.get_local(over_free_var), -stride); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount - 1); + + // create overflow assignment + bound.setup_names(); // hack to fix omega relation variable names issue + CG_outputRepr *rhs = NULL; + for (std::map<Variable_ID, int>::iterator j = + overflow_table[0][i].begin(); + j != overflow_table[0][i].end(); j++) + if ((*j).first != NULL) { + CG_outputRepr *t = ocg->CreateIdent((*j).first->name()); + if ((*j).second != 1) + t = ocg->CreateTimes(ocg->CreateInt((*j).second), + t); + rhs = ocg->CreatePlus(rhs, t); + } else if ((*j).second != 0) + rhs = ocg->CreatePlus(rhs, ocg->CreateInt((*j).second)); + + if (stride != 1) + rhs = ocg->CreateIntegerCeil(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, + ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->StmtListAppend(overflow_code, + ocg->CreateAssignment(0, lhs, rhs)); + } + } + + // upper splitting condition + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[0]); + } else { + std::string over_name = overflow_var_name_prefix + + to_string(overflow_var_name_counter++); + Free_Var_Decl *over_free_var = new Free_Var_Decl(over_name); + over_var_list.push_back(over_free_var); + + std::vector<CG_outputRepr *> lb_repr_list, ub_repr_list; + for (int i = 0; i < lb_list.size(); i++) { + lb_repr_list.push_back( + output_lower_bound_repr(ocg, lb_list[i], + bound.set_var(dim + 1), result.first, result.second, + bound, Relation::True(bound.n_set()), + std::vector<std::pair<CG_outputRepr *, int> >( + bound.n_set(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), + 0)))); + GEQ_Handle h = cond_lower.and_with_GEQ(lb_list[i]); + } + for (int i = 0; i < ub_list.size(); i++) { + ub_repr_list.push_back( + output_upper_bound_repr(ocg, ub_list[i], + bound.set_var(dim + 1), bound, + std::vector<std::pair<CG_outputRepr *, int> >( + bound.n_set(), + std::make_pair( + static_cast<CG_outputRepr *>(NULL), + 0)))); + GEQ_Handle h = cond_upper.and_with_GEQ(ub_list[i]); + h.update_coef(cond_upper.get_local(over_free_var), -stride); + } + + CG_outputRepr *lbRepr, *ubRepr; + if (lb_repr_list.size() > 1) + lbRepr = ocg->CreateInvoke("max", lb_repr_list); + else if (lb_repr_list.size() == 1) + lbRepr = lb_repr_list[0]; + + if (ub_repr_list.size() > 1) + ubRepr = ocg->CreateInvoke("min", ub_repr_list); + else if (ub_repr_list.size() == 1) + ubRepr = ub_repr_list[0]; + + // create overflow assignment + CG_outputRepr *rhs = ocg->CreatePlus(ocg->CreateMinus(ubRepr, lbRepr), + ocg->CreateInt(1)); + if (stride != 1) + rhs = ocg->CreateIntegerFloor(rhs, ocg->CreateInt(stride)); + rhs = ocg->CreateIntegerMod(rhs, ocg->CreateInt(unroll_amount)); + CG_outputRepr *lhs = ocg->CreateIdent(over_name); + init_code = ocg->StmtListAppend(init_code, + ocg->CreateAssignment(0, lhs, ocg->CreateInt(0))); + lhs = ocg->CreateIdent(over_name); + overflow_code = ocg->CreateAssignment(0, lhs, rhs); + + // insert constraint 0 <= overflow < unroll_amount + Variable_ID v = overflow_constraint.get_local(over_free_var); + GEQ_Handle h1 = overflow_constraint_root->add_GEQ(); + h1.update_coef(v, 1); + GEQ_Handle h2 = overflow_constraint_root->add_GEQ(); + h2.update_coef(v, -1); + h2.update_const(unroll_amount - 1); + } + + // insert overflow statement + int overflow_stmt_num = -1; + if (overflow_code != NULL) { + // build iteration space for overflow statement + Relation mapping(level, cleanup_split_level - 1); + F_And *f_root = mapping.add_and(); + for (int i = 1; i < cleanup_split_level; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), 1); + h.update_coef(mapping.input_var(i), -1); + } + Relation overflow_IS = Range(Restrict_Domain(mapping, copy(hull))); + for (int i = 1; i < cleanup_split_level; i++) + overflow_IS.name_set_var(i, hull.set_var(i)->name()); + overflow_IS.setup_names(); + + // build dumb transformation relation for overflow statement + Relation overflow_xform(cleanup_split_level - 1, + 2 * (cleanup_split_level - 1) + 1); + f_root = overflow_xform.add_and(); + for (int i = 1; i <= cleanup_split_level - 1; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(overflow_xform.output_var(2 * i), 1); + h.update_coef(overflow_xform.input_var(i), -1); + + h = f_root->add_EQ(); + h.update_coef(overflow_xform.output_var(2 * i - 1), 1); + h.update_const(-lex[2 * i - 2]); + } + EQ_Handle h = f_root->add_EQ(); + h.update_coef( + overflow_xform.output_var(2 * (cleanup_split_level - 1) + 1), + 1); + h.update_const(-lex[2 * (cleanup_split_level - 1)]); + + shiftLexicalOrder(lex, 2 * cleanup_split_level - 2, 1); + Statement overflow_stmt; + + overflow_stmt.code = overflow_code; + overflow_stmt.IS = overflow_IS; + overflow_stmt.xform = overflow_xform; + overflow_stmt.loop_level = std::vector<LoopLevel>(level - 1); + overflow_stmt.ir_stmt_node = NULL; + for (int i = 0; i < level - 1; i++) { + overflow_stmt.loop_level[i].type = + stmt[stmt_num].loop_level[i].type; + if (stmt[stmt_num].loop_level[i].type == LoopLevelTile + && stmt[stmt_num].loop_level[i].payload >= level) + overflow_stmt.loop_level[i].payload = -1; + else + overflow_stmt.loop_level[i].payload = + stmt[stmt_num].loop_level[i].payload; + overflow_stmt.loop_level[i].parallel_level = + stmt[stmt_num].loop_level[i].parallel_level; + } + + stmt.push_back(overflow_stmt); + dep.insert(); + overflow_stmt_num = stmt.size() - 1; + overflow[overflow_stmt_num] = over_var_list; + + // update the global known information on overflow variable + this->known = Intersection(this->known, + Extend_Set(copy(overflow_constraint), + this->known.n_set() - overflow_constraint.n_set())); + + // update dependence graph + DependenceVector dv; + dv.type = DEP_CONTROL; + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + dep.connect(overflow_stmt_num, *i, dv); + dv.type = DEP_W2W; + { + IR_ScalarSymbol *overflow_sym = NULL; + std::vector<IR_ScalarRef *> scalars = ir->FindScalarRef( + overflow_code); + for (int i = scalars.size() - 1; i >= 0; i--) + if (scalars[i]->is_write()) { + overflow_sym = scalars[i]->symbol(); + break; + } + for (int i = scalars.size() - 1; i >= 0; i--) + delete scalars[i]; + dv.sym = overflow_sym; + } + dv.lbounds = std::vector<coef_t>(dep.num_dim(), 0); + dv.ubounds = std::vector<coef_t>(dep.num_dim(), 0); + int dep_dim = get_last_dep_dim_before(stmt_num, level); + for (int i = dep_dim + 1; i < dep.num_dim(); i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (int i = 0; i <= dep_dim; i++) { + if (i != 0) { + dv.lbounds[i - 1] = 0; + dv.ubounds[i - 1] = 0; + } + dv.lbounds[i] = 1; + dv.ubounds[i] = posInfinity; + dep.connect(overflow_stmt_num, overflow_stmt_num, dv); + } + } + + // split the loop so it can be fully unrolled + std::set<int> new_stmts = split(stmt_num, cleanup_split_level, cond_upper); + std::set<int> new_stmts2 = split(stmt_num, cleanup_split_level, cond_lower); + new_stmts.insert(new_stmts2.begin(), new_stmts2.end()); + + // check if unrolled statements can be trivially lumped together as one statement + bool can_be_lumped = true; + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + if (*i != stmt_num) { + if (stmt[*i].loop_level.size() + != stmt[stmt_num].loop_level.size()) { + can_be_lumped = false; + break; + } + for (int j = 0; j < stmt[stmt_num].loop_level.size(); j++) + if (!(stmt[*i].loop_level[j].type + == stmt[stmt_num].loop_level[j].type + && stmt[*i].loop_level[j].payload + == stmt[stmt_num].loop_level[j].payload)) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + std::vector<int> lex2 = getLexicalOrder(*i); + for (int j = 2 * level; j < lex.size() - 1; j += 2) + if (lex[j] != lex2[j]) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + if (is_inner_loop_depend_on_level(stmt[*i].IS, level, + this->known)) { + can_be_lumped = false; + break; + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + if (*i != stmt_num) { + if (!(Must_Be_Subset(copy(stmt[*i].IS), copy(stmt[stmt_num].IS)) + && Must_Be_Subset(copy(stmt[stmt_num].IS), + copy(stmt[*i].IS)))) { + can_be_lumped = false; + break; + } + } + } + if (can_be_lumped) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[*i].second.begin(); + j != dep.vertex[*i].second.end(); j++) + if (same_loop.find(j->first) != same_loop.end()) { + for (int k = 0; k < j->second.size(); k++) + if (j->second[k].type == DEP_CONTROL + || j->second[k].type == DEP_UNKNOWN) { + can_be_lumped = false; + break; + } + if (!can_be_lumped) + break; + } + if (!can_be_lumped) + break; + } + } + + // insert unrolled statements + int old_num_stmt = stmt.size(); + if (!can_be_lumped) { + std::map<int, std::vector<int> > what_stmt_num; + + for (int j = 1; j < unroll_amount; j++) { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) { + Statement new_stmt; + + std::vector<std::string> loop_vars; + std::vector<CG_outputRepr *> subs; + loop_vars.push_back(stmt[*i].IS.set_var(level)->name()); + subs.push_back( + ocg->CreatePlus( + ocg->CreateIdent( + stmt[*i].IS.set_var(level)->name()), + ocg->CreateInt(j * stride))); + new_stmt.code = ocg->CreateSubstitutedStmt(0, + stmt[*i].code->clone(), loop_vars, subs); + + new_stmt.IS = adjust_loop_bound(stmt[*i].IS, level, j * stride); + add_loop_stride(new_stmt.IS, bound, level - 1, + unroll_amount * stride); + + new_stmt.xform = copy(stmt[*i].xform); + + new_stmt.loop_level = stmt[*i].loop_level; + new_stmt.ir_stmt_node = NULL; + stmt.push_back(new_stmt); + dep.insert(); + what_stmt_num[*i].push_back(stmt.size() - 1); + } + } + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + add_loop_stride(stmt[*i].IS, bound, level - 1, + unroll_amount * stride); + + // update dependence graph + if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; + int new_stride = unroll_amount * stride; + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end();) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type == DEP_CONTROL + || dv.type == DEP_UNKNOWN) { + D.push_back(std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount - 1; + kk++) + if (what_stmt_num[i][kk] != -1 + && what_stmt_num[j->first][kk] + != -1) + dep.connect(what_stmt_num[i][kk], + what_stmt_num[j->first][kk], + dv); + } else { + coef_t lb = dv.lbounds[dep_dim]; + coef_t ub = dv.ubounds[dep_dim]; + if (ub == lb + && int_mod(lb, + static_cast<coef_t>(new_stride)) + == 0) { + D.push_back( + std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount - 1; + kk++) + if (what_stmt_num[i][kk] != -1 + && what_stmt_num[j->first][kk] + != -1) + dep.connect( + what_stmt_num[i][kk], + what_stmt_num[j->first][kk], + dv); + } else if (lb == -posInfinity + && ub == posInfinity) { + D.push_back( + std::make_pair(j->first, dv)); + for (int kk = 0; kk < unroll_amount; + kk++) + if (kk == 0) + D.push_back( + std::make_pair(j->first, + dv)); + else if (what_stmt_num[j->first][kk + - 1] != -1) + D.push_back( + std::make_pair( + what_stmt_num[j->first][kk + - 1], + dv)); + for (int t = 0; t < unroll_amount - 1; + t++) + if (what_stmt_num[i][t] != -1) + for (int kk = 0; + kk < unroll_amount; + kk++) + if (kk == 0) + dep.connect( + what_stmt_num[i][t], + j->first, dv); + else if (what_stmt_num[j->first][kk + - 1] != -1) + dep.connect( + what_stmt_num[i][t], + what_stmt_num[j->first][kk + - 1], + dv); + } else { + for (int kk = 0; kk < unroll_amount; + kk++) { + if (lb != -posInfinity) { + if (kk * stride + < int_mod(lb, + static_cast<coef_t>(new_stride))) + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb) + / new_stride) + * new_stride + + new_stride; + else + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb) + / new_stride) + * new_stride; + } + if (ub != posInfinity) { + if (kk * stride + > int_mod(ub, + static_cast<coef_t>(new_stride))) + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub) + / new_stride) + * new_stride + - new_stride; + else + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub) + / new_stride) + * new_stride; + } + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) { + if (kk == 0) + D.push_back( + std::make_pair( + j->first, + dv)); + else if (what_stmt_num[j->first][kk + - 1] != -1) + D.push_back( + std::make_pair( + what_stmt_num[j->first][kk + - 1], + dv)); + } + } + for (int t = 0; t < unroll_amount - 1; + t++) + if (what_stmt_num[i][t] != -1) + for (int kk = 0; + kk < unroll_amount; + kk++) { + if (lb != -posInfinity) { + if (kk * stride + < int_mod( + lb + t + + 1, + static_cast<coef_t>(new_stride))) + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb + + (t + + 1) + * stride) + / new_stride) + * new_stride + + new_stride; + else + dv.lbounds[dep_dim] = + floor( + static_cast<double>(lb + + (t + + 1) + * stride) + / new_stride) + * new_stride; + } + if (ub != posInfinity) { + if (kk * stride + > int_mod( + ub + t + + 1, + static_cast<coef_t>(new_stride))) + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub + + (t + + 1) + * stride) + / new_stride) + * new_stride + - new_stride; + else + dv.ubounds[dep_dim] = + floor( + static_cast<double>(ub + + (t + + 1) + * stride) + / new_stride) + * new_stride; + } + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) { + if (kk == 0) + dep.connect( + what_stmt_num[i][t], + j->first, + dv); + else if (what_stmt_num[j->first][kk + - 1] != -1) + dep.connect( + what_stmt_num[i][t], + what_stmt_num[j->first][kk + - 1], + dv); + } + } + } + } + } + + dep.vertex[i].second.erase(j++); + } else { + for (int kk = 0; kk < unroll_amount - 1; kk++) + if (what_stmt_num[i][kk] != -1) + dep.connect(what_stmt_num[i][kk], j->first, + j->second); + + j++; + } + } else { + if (same_loop.find(j->first) != same_loop.end()) + for (int k = 0; k < j->second.size(); k++) + for (int kk = 0; kk < unroll_amount - 1; kk++) + if (what_stmt_num[j->first][kk] != -1) + D.push_back( + std::make_pair( + what_stmt_num[j->first][kk], + j->second[k])); + j++; + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + } + + // reset lexical order for the unrolled loop body + std::set<int> new_same_loop; + + int count = 0; + + for (std::map<int, std::vector<int> >::iterator i = + what_stmt_num.begin(); i != what_stmt_num.end(); i++) { + + new_same_loop.insert(i->first); + for (int k = dim + 1; k < stmt[i->first].xform.n_out(); k += 2) + assign_const(stmt[i->first].xform, k, + get_const(stmt[(what_stmt_num.begin())->first].xform, k, + Output_Var) + count); + count++; + for (int j = 0; j < i->second.size(); j++) { + new_same_loop.insert(i->second[j]); + for (int k = dim + 1; k < stmt[i->second[j]].xform.n_out(); k += + 2) + assign_const(stmt[i->second[j]].xform, k, + get_const( + stmt[(what_stmt_num.begin())->first].xform, + k, Output_Var) + count); + count++; + } + } + setLexicalOrder(dim + 1, new_same_loop, 0, idxNames); + } else { + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + add_loop_stride(stmt[*i].IS, bound, level - 1, + unroll_amount * stride); + + int max_level = stmt[stmt_num].loop_level.size(); + std::vector<std::pair<int, int> > stmt_order; + for (std::set<int>::iterator i = same_loop.begin(); + i != same_loop.end(); i++) + stmt_order.push_back( + std::make_pair( + get_const(stmt[*i].xform, 2 * max_level, + Output_Var), *i)); + sort(stmt_order.begin(), stmt_order.end()); + + Statement new_stmt; + new_stmt.code = NULL; + for (int j = 1; j < unroll_amount; j++) + for (int i = 0; i < stmt_order.size(); i++) { + std::vector<std::string> loop_vars; + std::vector<CG_outputRepr *> subs; + loop_vars.push_back( + stmt[stmt_order[i].second].IS.set_var(level)->name()); + subs.push_back( + ocg->CreatePlus( + ocg->CreateIdent( + stmt[stmt_order[i].second].IS.set_var( + level)->name()), + ocg->CreateInt(j * stride))); + CG_outputRepr *code = ocg->CreateSubstitutedStmt(0, + stmt[stmt_order[i].second].code->clone(), loop_vars, + subs); + new_stmt.code = ocg->StmtListAppend(new_stmt.code, code); + } + + new_stmt.IS = copy(stmt[stmt_num].IS); + new_stmt.xform = copy(stmt[stmt_num].xform); + assign_const(new_stmt.xform, 2 * max_level, + stmt_order[stmt_order.size() - 1].first + 1); + new_stmt.loop_level = stmt[stmt_num].loop_level; + new_stmt.ir_stmt_node = NULL; + stmt.push_back(new_stmt); + dep.insert(); + + // update dependence graph + if (stmt[stmt_num].loop_level[level - 1].type == LoopLevelOriginal) { + int dep_dim = stmt[stmt_num].loop_level[level - 1].payload; + int new_stride = unroll_amount * stride; + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::pair<int, std::vector<DependenceVector> > > D; + + for (DependenceGraph::EdgeList::iterator j = + dep.vertex[i].second.begin(); + j != dep.vertex[i].second.end();) { + if (same_loop.find(i) != same_loop.end()) { + if (same_loop.find(j->first) != same_loop.end()) { + std::vector<DependenceVector> dvs11, dvs12, dvs22, + dvs21; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.type == DEP_CONTROL + || dv.type == DEP_UNKNOWN) { + if (i == j->first) { + dvs11.push_back(dv); + dvs22.push_back(dv); + } else + throw loop_error( + "unrolled statements lumped together illegally"); + } else { + coef_t lb = dv.lbounds[dep_dim]; + coef_t ub = dv.ubounds[dep_dim]; + if (ub == lb + && int_mod(lb, + static_cast<coef_t>(new_stride)) + == 0) { + dvs11.push_back(dv); + dvs22.push_back(dv); + } else { + if (lb != -posInfinity) + dv.lbounds[dep_dim] = ceil( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = floor( + static_cast<double>(ub) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs11.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = ceil( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = ceil( + static_cast<double>(ub) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs21.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = floor( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = floor( + static_cast<double>(ub + - stride) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs12.push_back(dv); + + if (lb != -posInfinity) + dv.lbounds[dep_dim] = floor( + static_cast<double>(lb) + / new_stride) + * new_stride; + if (ub != posInfinity) + dv.ubounds[dep_dim] = ceil( + static_cast<double>(ub + - stride) + / new_stride) + * new_stride; + if (dv.ubounds[dep_dim] + >= dv.lbounds[dep_dim]) + dvs22.push_back(dv); + } + } + } + if (dvs11.size() > 0) + D.push_back(std::make_pair(i, dvs11)); + if (dvs22.size() > 0) + dep.connect(old_num_stmt, old_num_stmt, dvs22); + if (dvs12.size() > 0) + D.push_back( + std::make_pair(old_num_stmt, dvs12)); + if (dvs21.size() > 0) + dep.connect(old_num_stmt, i, dvs21); + + dep.vertex[i].second.erase(j++); + } else { + dep.connect(old_num_stmt, j->first, j->second); + j++; + } + } else { + if (same_loop.find(j->first) != same_loop.end()) + D.push_back( + std::make_pair(old_num_stmt, j->second)); + j++; + } + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, D[j].first, D[j].second); + } + } + } + + return new_stmts; +} + + diff --git a/mem_mapping_utils.cc b/mem_mapping_utils.cc new file mode 100644 index 0000000..645fe59 --- /dev/null +++ b/mem_mapping_utils.cc @@ -0,0 +1,76 @@ +#include <vector> +#include <string.h> +#include <map> +#include "rose.h" +#include "mem_mapping_utils.hh" + +using namespace SageBuilder; +using namespace SageInterface; + +memory_mapping::memory_mapping (bool used, const char * array_name){ + this->mem_used = used; + this->add(array_name); +} + +texture_memory_mapping::texture_memory_mapping(bool used, const char* array_name) : memory_mapping(used, array_name) { } +constant_memory_mapping::constant_memory_mapping(bool used, const char* array_name) : memory_mapping(used, array_name) { } +//texture_memory_mapping::texture_memory_mapping (bool used, const char* array_name, int width, int height) { +// tex_mem_used = used; +// this->add(array_name, width, height); +//} + +void memory_mapping::add(const char * array_name) { + this->mapped_array_name.push_back(std::string(array_name)); + //std::vector<int> ivec = std::vector<int>(); + //dims[std::string(array_name)] = ivec; +} +//void texture_memory_mapping::add(const char* array_name, int width, int height) { +// tex_mapped_array_name.push_back(std::string(array_name)); +// std::vector<int> ivec = std::vector<int>(); +// ivec.push_back(width); +// ivec.push_back(height); +// dims[std::string(array_name)] = ivec; +//} + +bool memory_mapping::is_mem_used(){ + return this->mem_used; +} +bool memory_mapping::is_array_mapped(const char * array_name){ + + for( int i=0; i<mapped_array_name.size(); i++){ + if(!(strcmp(array_name, mapped_array_name[i].c_str()))) + return true; + } + return false; +} +void memory_mapping::set_mapped_symbol(const char * array_name, SgVariableSymbol* sym) { + this->mapped_symbol[std::string(array_name)] = sym; +} +void texture_memory_mapping::set_devptr_symbol(const char * array_name, SgVariableSymbol* sym) { + devptr_symbol[std::string(array_name)] = sym; +} +void memory_mapping::set_vardef(const char* array_name, VarDefs* vardef) { + this->vardefs[std::string(array_name)] = vardef; +} +SgVarRefExp* memory_mapping::get_mapped_symbol_exp(const char * array_name) { + return buildVarRefExp(this->mapped_symbol[std::string(array_name)]); +} +SgVarRefExp* texture_memory_mapping::get_devptr_symbol_exp(const char * array_name) { + return buildVarRefExp(devptr_symbol[std::string(array_name)]); +} +VarDefs* memory_mapping::get_vardef(const char* vardef_name) { + return this->vardefs[std::string(vardef_name)]; +} +//int texture_memory_mapping::get_dims(const char* array_name) { +// return (int)(dims[std::string(array_name)].size()); +//} +//int texture_memory_mapping::get_dim_length(const char* array_name, int dim) { +// return dims[std::string(array_name)][dim]; +//} +memory_mapping::memory_mapping() { + mem_used = false; +} +texture_memory_mapping::texture_memory_mapping() : memory_mapping() { } +constant_memory_mapping::constant_memory_mapping() : memory_mapping() { } + + diff --git a/mem_mapping_utils.hh b/mem_mapping_utils.hh new file mode 100644 index 0000000..8ff0545 --- /dev/null +++ b/mem_mapping_utils.hh @@ -0,0 +1,59 @@ +#ifndef MEM_MAPPING_UTILS_HH +#define MEM_MAPPING_UTILS_HH + +#include <vector> +#include <string.h> +#include <map> +#include "rose.h" + +using namespace SageInterface; +using namespace SageBuilder; + +struct VarDefs; + +class memory_mapping { +private: + bool mem_used; + std::vector< std::string > mapped_array_name; + std::map<std::string, SgVariableSymbol*> mapped_symbol; + std::map<std::string, VarDefs*> vardefs; +public: + memory_mapping(); + memory_mapping(bool used, const char* array_name); + void add(const char* array_name); + bool is_mem_used(); + bool is_array_mapped(const char* array_name); + void set_mapped_symbol(const char* array_name, SgVariableSymbol* sym); + void set_vardef(const char* array_name, VarDefs* vardef); + SgVarRefExp* get_mapped_symbol_exp(const char* array_name); + VarDefs* get_vardef(const char* vardef_name); +}; + +//protonu --class introduced to hold texture memory information in one single place +//this might help me get over the weird memory issues I am having with the Loop class +//where someone/something corrupts my memory + +class texture_memory_mapping : public memory_mapping { +private: + std::map<std::string, SgVariableSymbol*> devptr_symbol; + // a hack for multi-dimensional texture mapping + //std::map<std::string, std::vector<int> > dims; +public: + texture_memory_mapping ( bool used, const char * array_name); + //texture_memory_mapping (bool used, const char* array_name, int width, int height); + // this function is a hack to get arround a bug + // void add(const char* array_name, int width, int height); + void set_devptr_symbol(const char * array_name, SgVariableSymbol* sym); + SgVarRefExp* get_devptr_symbol_exp(const char * array_name); + //int get_dim_length(const char* array_name, int dim); + //int get_dims(const char* array_name); + texture_memory_mapping(); +}; + +class constant_memory_mapping : public memory_mapping { +public: + constant_memory_mapping(); + constant_memory_mapping(bool used, const char* array_name); +}; + +#endif diff --git a/omegatools.cc b/omegatools.cc new file mode 100644 index 0000000..d88fd2a --- /dev/null +++ b/omegatools.cc @@ -0,0 +1,2312 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Useful tools involving Omega manipulation. + + Notes: + + History: + 01/2006 Created by Chun Chen. + 03/2009 Upgrade Omega's interaction with compiler to IR_Code, by Chun Chen. +*****************************************************************************/ + +#include <codegen.h> +// #include <code_gen/output_repr.h> +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + +namespace { + struct DependenceLevel { + Relation r; + int level; + int dir; // direction upto current level: + // -1:negative, 0: undetermined, 1: postive + std::vector<coef_t> lbounds; + std::vector<coef_t> ubounds; + DependenceLevel(const Relation &_r, int _dims): + r(_r), level(0), dir(0), lbounds(_dims), ubounds(_dims) {} + }; +} + + + + +std::string tmp_e() { + static int counter = 1; + return std::string("e")+to_string(counter++); +} + + + +//----------------------------------------------------------------------------- +// Convert expression tree to omega relation. "destroy" means shallow +// deallocation of "repr", not freeing the actual code inside. +// ----------------------------------------------------------------------------- +void exp2formula(IR_Code *ir, Relation &r, F_And *f_root, std::vector<Free_Var_Decl*> &freevars, + CG_outputRepr *repr, Variable_ID lhs, char side, IR_CONDITION_TYPE rel, bool destroy) { + +// void exp2formula(IR_Code *ir, Relation &r, F_And *f_root, std::vector<Free_Var_Decl*> &freevars, +// CG_outputRepr *repr, Variable_ID lhs, char side, char rel, bool destroy) { + + switch (ir->QueryExpOperation(repr)) { + case IR_OP_CONSTANT: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + IR_ConstantRef *ref = static_cast<IR_ConstantRef *>(ir->Repr2Ref(v[0])); + if (!ref->is_integer()) + throw ir_exp_error("non-integer constant coefficient"); + + coef_t c = ref->integer(); + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(lhs, 1); + if (rel == IR_COND_GE) + h.update_const(-c); + else + h.update_const(-c-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(lhs, -1); + if (rel == IR_COND_LE) + h.update_const(c); + else + h.update_const(c-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(lhs, 1); + h.update_const(-c); + } + else + throw std::invalid_argument("unsupported condition type"); + + delete v[0]; + delete ref; + if (destroy) + delete repr; + break; + } + case IR_OP_VARIABLE: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + IR_ScalarRef *ref = static_cast<IR_ScalarRef *>(ir->Repr2Ref(v[0])); + + std::string s = ref->name(); + Variable_ID e = find_index(r, s, side); + + if (e == NULL) { // must be free variable + Free_Var_Decl *t = NULL; + for (unsigned i = 0; i < freevars.size(); i++) { + std::string ss = freevars[i]->base_name(); + if (s == ss) { + t = freevars[i]; + break; + } + } + + if (t == NULL) { + t = new Free_Var_Decl(s); + freevars.insert(freevars.end(), t); + } + + e = r.get_local(t); + } + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -1); + if (rel == IR_COND_GT) + h.update_const(-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e, 1); + if (rel == IR_COND_LT) + h.update_const(-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -1); + } + else + throw std::invalid_argument("unsupported condition type"); + + // delete v[0]; + delete ref; + if (destroy) + delete repr; + break; + } + case IR_OP_ASSIGNMENT: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + exp2formula(ir, r, f_root, freevars, v[0], lhs, side, rel, true); + if (destroy) + delete repr; + break; + } + case IR_OP_PLUS: + { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e1 = f_exists->declare(tmp_e()); + Variable_ID e2 = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e1, -1); + h.update_coef(e2, -1); + if (rel == IR_COND_GT) + h.update_const(-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e1, 1); + h.update_coef(e2, 1); + if (rel == IR_COND_LT) + h.update_const(-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e1, -1); + h.update_coef(e2, -1); + } + else + throw std::invalid_argument("unsupported condition type"); + + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + exp2formula(ir, r, f_and, freevars, v[0], e1, side, IR_COND_EQ, true); + exp2formula(ir, r, f_and, freevars, v[1], e2, side, IR_COND_EQ, true); + if (destroy) + delete repr; + break; + } + case IR_OP_MINUS: + { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e1 = f_exists->declare(tmp_e()); + Variable_ID e2 = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e1, -1); + h.update_coef(e2, 1); + if (rel == IR_COND_GT) + h.update_const(-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e1, 1); + h.update_coef(e2, -1); + if (rel == IR_COND_LT) + h.update_const(-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e1, -1); + h.update_coef(e2, 1); + } + else + throw std::invalid_argument("unsupported condition type"); + + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + exp2formula(ir, r, f_and, freevars, v[0], e1, side, IR_COND_EQ, true); + exp2formula(ir, r, f_and, freevars, v[1], e2, side, IR_COND_EQ, true); + if (destroy) + delete repr; + break; + } + case IR_OP_MULTIPLY: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + + coef_t coef; + CG_outputRepr *term; + if (ir->QueryExpOperation(v[0]) == IR_OP_CONSTANT) { + IR_ConstantRef *ref = static_cast<IR_ConstantRef *>(ir->Repr2Ref(v[0])); + coef = ref->integer(); + delete v[0]; + delete ref; + term = v[1]; + } + else if (ir->QueryExpOperation(v[1]) == IR_OP_CONSTANT) { + IR_ConstantRef *ref = static_cast<IR_ConstantRef *>(ir->Repr2Ref(v[1])); + coef = ref->integer(); + delete v[1]; + delete ref; + term = v[0]; + } + else + throw ir_exp_error("not presburger expression"); + + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -coef); + if (rel == IR_COND_GT) + h.update_const(-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e, coef); + if (rel == IR_COND_LT) + h.update_const(-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -coef); + } + else + throw std::invalid_argument("unsupported condition type"); + + exp2formula(ir, r, f_and, freevars, term, e, side, IR_COND_EQ, true); + if (destroy) + delete repr; + break; + } + case IR_OP_DIVIDE: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + + assert(ir->QueryExpOperation(v[1]) == IR_OP_CONSTANT); + IR_ConstantRef *ref = static_cast<IR_ConstantRef *>(ir->Repr2Ref(v[1])); + coef_t coef = ref->integer(); + delete v[1]; + delete ref; + + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, coef); + h.update_coef(e, -1); + if (rel == IR_COND_GT) + h.update_const(-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -coef); + h.update_coef(e, 1); + if (rel == IR_COND_LT) + h.update_const(-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, coef); + h.update_coef(e, -1); + } + else + throw std::invalid_argument("unsupported condition type"); + + exp2formula(ir, r, f_and, freevars, v[0], e, side, IR_COND_EQ, true); + if (destroy) + delete repr; + break; + } + case IR_OP_POSITIVE: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + + exp2formula(ir, r, f_root, freevars, v[0], lhs, side, rel, true); + if (destroy) + delete repr; + break; + } + case IR_OP_NEGATIVE: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e, 1); + if (rel == IR_COND_GT) + h.update_const(-1); + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e, -1); + if (rel == IR_COND_LT) + h.update_const(-1); + } + else if (rel == IR_COND_EQ) { + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e, 1); + } + else + throw std::invalid_argument("unsupported condition type"); + + exp2formula(ir, r, f_and, freevars, v[0], e, side, IR_COND_EQ, true); + if (destroy) + delete repr; + break; + } + case IR_OP_MIN: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + + F_Exists *f_exists = f_root->add_exists(); + + if (rel == IR_COND_GE || rel == IR_COND_GT) { + F_Or *f_or = f_exists->add_and()->add_or(); + for (int i = 0; i < v.size(); i++) { + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_or->add_and(); + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -1); + if (rel == IR_COND_GT) + h.update_const(-1); + + exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, true); + } + } + else if (rel == IR_COND_LE || rel == IR_COND_LT) { + F_And *f_and = f_exists->add_and(); + for (int i = 0; i < v.size(); i++) { + Variable_ID e = f_exists->declare(tmp_e()); + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e, 1); + if (rel == IR_COND_LT) + h.update_const(-1); + + exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, true); + } + } + else if (rel == IR_COND_EQ) { + F_Or *f_or = f_exists->add_and()->add_or(); + for (int i = 0; i < v.size(); i++) { + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_or->add_and(); + + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -1); + + exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, false); + + for (int j = 0; j < v.size(); j++) + if (j != i) { + Variable_ID e2 = f_exists->declare(tmp_e()); + GEQ_Handle h2 = f_and->add_GEQ(); + h2.update_coef(e, -1); + h2.update_coef(e2, 1); + + exp2formula(ir, r, f_and, freevars, v[j], e2, side, IR_COND_EQ, false); + } + } + + for (int i = 0; i < v.size(); i++) + delete v[i]; + } + else + throw std::invalid_argument("unsupported condition type"); + + if (destroy) + delete repr; + } + case IR_OP_MAX: + { + std::vector<CG_outputRepr *> v = ir->QueryExpOperand(repr); + + F_Exists *f_exists = f_root->add_exists(); + + if (rel == IR_COND_LE || rel == IR_COND_LT) { + F_Or *f_or = f_exists->add_and()->add_or(); + for (int i = 0; i < v.size(); i++) { + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_or->add_and(); + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, -1); + h.update_coef(e, 1); + if (rel == IR_COND_LT) + h.update_const(-1); + + exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, true); + } + } + else if (rel == IR_COND_GE || rel == IR_COND_GT) { + F_And *f_and = f_exists->add_and(); + for (int i = 0; i < v.size(); i++) { + Variable_ID e = f_exists->declare(tmp_e()); + GEQ_Handle h = f_and->add_GEQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -1); + if (rel == IR_COND_GT) + h.update_const(-1); + + exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, true); + } + } + else if (rel == IR_COND_EQ) { + F_Or *f_or = f_exists->add_and()->add_or(); + for (int i = 0; i < v.size(); i++) { + Variable_ID e = f_exists->declare(tmp_e()); + F_And *f_and = f_or->add_and(); + + EQ_Handle h = f_and->add_EQ(); + h.update_coef(lhs, 1); + h.update_coef(e, -1); + + exp2formula(ir, r, f_and, freevars, v[i], e, side, IR_COND_EQ, false); + + for (int j = 0; j < v.size(); j++) + if (j != i) { + Variable_ID e2 = f_exists->declare(tmp_e()); + GEQ_Handle h2 = f_and->add_GEQ(); + h2.update_coef(e, 1); + h2.update_coef(e2, -1); + + exp2formula(ir, r, f_and, freevars, v[j], e2, side, IR_COND_EQ, false); + } + } + + for (int i = 0; i < v.size(); i++) + delete v[i]; + } + else + throw std::invalid_argument("unsupported condition type"); + + if (destroy) + delete repr; + } + case IR_OP_NULL: + break; + default: + throw ir_exp_error("unsupported operand type"); + } +} + + +//----------------------------------------------------------------------------- +// Build dependence relation for two array references. +// ----------------------------------------------------------------------------- +Relation arrays2relation(IR_Code *ir, std::vector<Free_Var_Decl*> &freevars, + const IR_ArrayRef *ref_src, const Relation &IS_w, + const IR_ArrayRef *ref_dst, const Relation &IS_r) { + Relation &IS1 = const_cast<Relation &>(IS_w); + Relation &IS2 = const_cast<Relation &>(IS_r); + + Relation r(IS1.n_set(), IS2.n_set()); + + for (int i = 1; i <= IS1.n_set(); i++) + r.name_input_var(i, IS1.set_var(i)->name()); + + for (int i = 1; i <= IS2.n_set(); i++) + r.name_output_var(i, IS2.set_var(i)->name()+"'"); + + IR_Symbol *sym_src = ref_src->symbol(); + IR_Symbol *sym_dst = ref_dst->symbol(); + if (*sym_src != *sym_dst) { + r.add_or(); // False Relation + delete sym_src; + delete sym_dst; + return r; + } + else { + delete sym_src; + delete sym_dst; + } + + F_And *f_root = r.add_and(); + + for (int i = 0; i < ref_src->n_dim(); i++) { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e1 = f_exists->declare(tmp_e()); + Variable_ID e2 = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + + CG_outputRepr *repr_src = ref_src->index(i); + CG_outputRepr *repr_dst = ref_dst->index(i); + + bool has_complex_formula = false; + try { + exp2formula(ir, r, f_and, freevars, repr_src, e1, 'w', IR_COND_EQ, false); + exp2formula(ir, r, f_and, freevars, repr_dst, e2, 'r', IR_COND_EQ, false); + } + catch (const ir_exp_error &e) { + has_complex_formula = true; + } + + if (!has_complex_formula) { + EQ_Handle h = f_and->add_EQ(); + h.update_coef(e1, 1); + h.update_coef(e2, -1); + } + + repr_src->clear(); + repr_dst->clear(); + delete repr_src; + delete repr_dst; + } + + // add iteration space restriction + r = Restrict_Domain(r, copy(IS1)); + r = Restrict_Range(r, copy(IS2)); + + // reset the output variable names lost in restriction + for (int i = 1; i <= IS2.n_set(); i++) + r.name_output_var(i, IS2.set_var(i)->name()+"'"); + + return r; +} + + +//----------------------------------------------------------------------------- +// Convert array dependence relation into set of dependence vectors, assuming +// ref_w is lexicographically before ref_r in the source code. +// ----------------------------------------------------------------------------- +std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > relation2dependences (const IR_ArrayRef *ref_src, const IR_ArrayRef *ref_dst, const Relation &r) { + assert(r.n_inp() == r.n_out()); + + std::vector<DependenceVector> dependences1, dependences2; + std::stack<DependenceLevel> working; + working.push(DependenceLevel(r, r.n_inp())); + + while (!working.empty()) { + DependenceLevel dep = working.top(); + working.pop(); + + // No dependence exists, move on. + if (!dep.r.is_satisfiable()) + continue; + + if (dep.level == r.n_inp()) { + DependenceVector dv; + + // for loop independent dependence, use lexical order to + // determine the correct source and destination + if (dep.dir == 0) { + if (*ref_src == *ref_dst) + continue; // trivial self zero-dependence + + if (ref_src->is_write()) { + if (ref_dst->is_write()) + dv.type = DEP_W2W; + else + dv.type = DEP_W2R; + } + else { + if (ref_dst->is_write()) + dv.type = DEP_R2W; + else + dv.type = DEP_R2R; + } + + } + else if (dep.dir == 1) { + if (ref_src->is_write()) { + if (ref_dst->is_write()) + dv.type = DEP_W2W; + else + dv.type = DEP_W2R; + } + else { + if (ref_dst->is_write()) + dv.type = DEP_R2W; + else + dv.type = DEP_R2R; + } + } + else { // dep.dir == -1 + if (ref_dst->is_write()) { + if (ref_src->is_write()) + dv.type = DEP_W2W; + else + dv.type = DEP_W2R; + } + else { + if (ref_src->is_write()) + dv.type = DEP_R2W; + else + dv.type = DEP_R2R; + } + } + + dv.lbounds = dep.lbounds; + dv.ubounds = dep.ubounds; + dv.sym = ref_src->symbol(); + + if (dep.dir == 0 || dep.dir == 1) + dependences1.push_back(dv); + else + dependences2.push_back(dv); + } + else { + // now work on the next dimension level + int level = ++dep.level; + + coef_t lbound, ubound; + Relation delta = Deltas(copy(dep.r)); + delta.query_variable_bounds(delta.set_var(level), lbound, ubound); + + if (dep.dir == 0) { + if (lbound > 0) { + dep.dir = 1; + dep.lbounds[level-1] = lbound; + dep.ubounds[level-1] = ubound; + + working.push(dep); + } + else if (ubound < 0) { + dep.dir = -1; + dep.lbounds[level-1] = -ubound; + dep.ubounds[level-1] = -lbound; + + working.push(dep); + } + else { + // split the dependence vector into flow- and anti-dependence + // for the first non-zero distance, also separate zero distance + // at this level. + { + DependenceLevel dep2 = dep; + + dep2.lbounds[level-1] = 0; + dep2.ubounds[level-1] = 0; + + F_And *f_root = dep2.r.and_with_and(); + EQ_Handle h = f_root->add_EQ(); + h.update_coef(dep2.r.input_var(level), 1); + h.update_coef(dep2.r.output_var(level), -1); + + working.push(dep2); + } + + if (lbound < 0 && *ref_src != *ref_dst) { + DependenceLevel dep2 = dep; + + F_And *f_root = dep2.r.and_with_and(); + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(dep2.r.input_var(level), 1); + h.update_coef(dep2.r.output_var(level), -1); + h.update_const(-1); + + // get tighter bounds under new constraints + coef_t lbound, ubound; + delta = Deltas(copy(dep2.r)); + delta.query_variable_bounds(delta.set_var(level), + lbound, ubound); + + dep2.dir = -1; + dep2.lbounds[level-1] = max(-ubound,static_cast<coef_t>(1)); // use max() to avoid Omega retardness + dep2.ubounds[level-1] = -lbound; + + working.push(dep2); + } + + if (ubound > 0) { + DependenceLevel dep2 = dep; + + F_And *f_root = dep2.r.and_with_and(); + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(dep2.r.input_var(level), -1); + h.update_coef(dep2.r.output_var(level), 1); + h.update_const(-1); + + // get tighter bonds under new constraints + coef_t lbound, ubound; + delta = Deltas(copy(dep2.r)); + delta.query_variable_bounds(delta.set_var(level), + lbound, ubound); + dep2.dir = 1; + dep2.lbounds[level-1] = max(lbound,static_cast<coef_t>(1)); // use max() to avoid Omega retardness + dep2.ubounds[level-1] = ubound; + + working.push(dep2); + } + } + } + // now deal with dependence vector with known direction + // determined at previous levels + else { + // For messy bounds, further test to see if the dependence distance + // can be reduced to positive/negative. This is an omega hack. + if (lbound == negInfinity && ubound == posInfinity) { + { + Relation t = dep.r; + F_And *f_root = t.and_with_and(); + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(t.input_var(level), 1); + h.update_coef(t.output_var(level), -1); + h.update_const(-1); + + if (!t.is_satisfiable()) { + lbound = 0; + } + } + { + Relation t = dep.r; + F_And *f_root = t.and_with_and(); + GEQ_Handle h = f_root->add_GEQ(); + h.update_coef(t.input_var(level), -1); + h.update_coef(t.output_var(level), 1); + h.update_const(-1); + + if (!t.is_satisfiable()) { + ubound = 0; + } + } + } + + // Same thing as above, test to see if zero dependence + // distance possible. + if (lbound == 0 || ubound == 0) { + Relation t = dep.r; + F_And *f_root = t.and_with_and(); + EQ_Handle h = f_root->add_EQ(); + h.update_coef(t.input_var(level), 1); + h.update_coef(t.output_var(level), -1); + + if (!t.is_satisfiable()) { + if (lbound == 0) + lbound = 1; + if (ubound == 0) + ubound = -1; + } + } + + if (dep.dir == -1) { + dep.lbounds[level-1] = -ubound; + dep.ubounds[level-1] = -lbound; + } + else { // dep.dir == 1 + dep.lbounds[level-1] = lbound; + dep.ubounds[level-1] = ubound; + } + + working.push(dep); + } + } + } + + return std::make_pair(dependences1, dependences2); +} + + +//----------------------------------------------------------------------------- +// Convert a boolean expression to omega relation. "destroy" means shallow +// deallocation of "repr", not freeing the actual code inside. +//----------------------------------------------------------------------------- +void exp2constraint(IR_Code *ir, Relation &r, F_And *f_root, + std::vector<Free_Var_Decl *> &freevars, + CG_outputRepr *repr, bool destroy) { + IR_CONDITION_TYPE cond = ir->QueryBooleanExpOperation(repr); + switch (cond) { + case IR_COND_LT: + case IR_COND_LE: + case IR_COND_EQ: + case IR_COND_GT: + case IR_COND_GE: { + F_Exists *f_exist = f_root->add_exists(); + Variable_ID e = f_exist->declare(); + F_And *f_and = f_exist->add_and(); + std::vector<omega::CG_outputRepr *> op = ir->QueryExpOperand(repr); + exp2formula(ir, r, f_and, freevars, op[0], e, 's', IR_COND_EQ, true); + exp2formula(ir, r, f_and, freevars, op[1], e, 's', cond, true); + if (destroy) + delete repr; + break; + } + case IR_COND_NE: { + F_Exists *f_exist = f_root->add_exists(); + Variable_ID e = f_exist->declare(); + F_Or *f_or = f_exist->add_or(); + F_And *f_and = f_or->add_and(); + std::vector<omega::CG_outputRepr *> op = ir->QueryExpOperand(repr); + exp2formula(ir, r, f_and, freevars, op[0], e, 's', IR_COND_EQ, false); + exp2formula(ir, r, f_and, freevars, op[1], e, 's', IR_COND_GT, false); + + f_and = f_or->add_and(); + exp2formula(ir, r, f_and, freevars, op[0], e, 's', IR_COND_EQ, true); + exp2formula(ir, r, f_and, freevars, op[1], e, 's', IR_COND_LT, true); + + if (destroy) + delete repr; + break; + } + default: + throw ir_exp_error("unrecognized conditional expression"); + } +} + + + + + +// inline void exp2formula(IR_Code *ir, Relation &r, F_And *f_root, +// std::vector<Free_Var_Decl*> &freevars, +// const CG_outputRepr *repr, Variable_ID lhs, char side, char rel) { +// exp2formula(ir, r, f_root, freevars, const_cast<CG_outputRepr *>(repr), lhs, side, rel, false); +// } + + + + + + + +//----------------------------------------------------------------------------- +// Convert suif expression tree to omega relation. +//----------------------------------------------------------------------------- + +// void suif2formula(Relation &r, F_And *f_root, +// std::vector<Free_Var_Decl*> &freevars, +// operand op, Variable_ID lhs, +// char side, char rel) { +// if (op.is_immed()) { +// immed im = op.immediate(); + +// if (im.is_integer()) { +// int c = im.integer(); + +// if (rel == '>') { +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(lhs, 1); +// h.update_const(-1*c); +// } +// else if (rel == '<') { +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(lhs, -1); +// h.update_const(c); +// } +// else { // '=' +// EQ_Handle h = f_root->add_EQ(); +// h.update_coef(lhs, 1); +// h.update_const(-1*c); +// } +// } +// else { +// return; //add Function in the future +// } +// } +// else if (op.is_symbol()) { +// String s = op.symbol()->name(); +// Variable_ID e = find_index(r, s, side); + +// if (e == NULL) { // must be free variable +// Free_Var_Decl *t = NULL; +// for (unsigned i = 0; i < freevars.size(); i++) { +// String ss = freevars[i]->base_name(); +// if (s == ss) { +// t = freevars[i]; +// break; +// } +// } + +// if (t == NULL) { +// t = new Free_Var_Decl(s); +// freevars.insert(freevars.end(), t); +// } + +// e = r.get_local(t); +// } + +// if (rel == '>') { +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e, -1); +// } +// else if (rel == '<') { +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(lhs, -1); +// h.update_coef(e, 1); +// } +// else { // '=' +// EQ_Handle h = f_root->add_EQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e, -1); +// } +// } +// else if (op.is_instr()) +// suif2formula(r, f_root, freevars, op.instr(), lhs, side, rel); +// } + + +// void suif2formula(Relation &r, F_And *f_root, +// std::vector<Free_Var_Decl*> &freevars, +// instruction *ins, Variable_ID lhs, +// char side, char rel) { +// if (ins->opcode() == io_cpy) { +// suif2formula(r, f_root, freevars, ins->src_op(0), lhs, side, rel); +// } +// else if (ins->opcode() == io_add || ins->opcode() == io_sub) { +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e1 = f_exists->declare(tmp_e()); +// Variable_ID e2 = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// int add_or_sub = ins->opcode() == io_add ? 1 : -1; +// if (rel == '>') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e1, -1); +// h.update_coef(e2, -1 * add_or_sub); +// } +// else if (rel == '<') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, -1); +// h.update_coef(e1, 1); +// h.update_coef(e2, 1 * add_or_sub); +// } +// else { // '=' +// EQ_Handle h = f_and->add_EQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e1, -1); +// h.update_coef(e2, -1 * add_or_sub); +// } + +// suif2formula(r, f_and, freevars, ins->src_op(0), e1, side, '='); +// suif2formula(r, f_and, freevars, ins->src_op(1), e2, side, '='); +// } +// else if (ins->opcode() == io_mul) { +// operand op1 = ins->src_op(0); +// operand op2 = ins->src_op(1); + +// if (!op1.is_immed() && !op2.is_immed()) +// return; // add Function in the future +// else { +// operand op; +// immed im; +// if (op1.is_immed()) { +// im = op1.immediate(); +// op = op2; +// } +// else { +// im = op2.immediate(); +// op = op1; +// } + +// if (!im.is_integer()) +// return; //add Function in the future +// else { +// int c = im.integer(); + +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// if (rel == '>') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e, -c); +// } +// else if (rel == '<') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, -1); +// h.update_coef(e, c); +// } +// else { +// EQ_Handle h = f_and->add_EQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e, -c); +// } + +// suif2formula(r, f_and, freevars, op, e, side, '='); +// } +// } +// } +// else if (ins->opcode() == io_div) { +// operand op1 = ins->src_op(0); +// operand op2 = ins->src_op(1); + +// if (!op2.is_immed()) +// return; //add Function in the future +// else { +// immed im = op2.immediate(); + +// if (!im.is_integer()) +// return; //add Function in the future +// else { +// int c = im.integer(); + +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// if (rel == '>') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, c); +// h.update_coef(e, -1); +// } +// else if (rel == '<') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, -c); +// h.update_coef(e, 1); +// } +// else { +// EQ_Handle h = f_and->add_EQ(); +// h.update_coef(lhs, c); +// h.update_coef(e, -1); +// } + +// suif2formula(r, f_and, freevars, op1, e, side, '='); +// } +// } +// } +// else if (ins->opcode() == io_neg) { +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// if (rel == '>') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e, 1); +// } +// else if (rel == '<') { +// GEQ_Handle h = f_and->add_GEQ(); +// h.update_coef(lhs, -1); +// h.update_coef(e, -1); +// } +// else { +// EQ_Handle h = f_and->add_EQ(); +// h.update_coef(lhs, 1); +// h.update_coef(e, 1); +// } + +// suif2formula(r, f_and, freevars, ins->src_op(0), e, side, '='); +// } +// else if (ins->opcode() == io_min) { +// operand op1 = ins->src_op(0); +// operand op2 = ins->src_op(1); + +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e1 = f_exists->declare(tmp_e()); +// Variable_ID e2 = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// if (rel == '>') { +// F_Or *f_or = f_and->add_or(); +// F_And *f_and1 = f_or->add_and(); +// GEQ_Handle h1 = f_and1->add_GEQ(); +// h1.update_coef(lhs, 1); +// h1.update_coef(e1, -1); +// F_And *f_and2 = f_or->add_and(); +// GEQ_Handle h2 = f_and2->add_GEQ(); +// h2.update_coef(lhs, 1); +// h2.update_coef(e2, -1); +// } +// else if (rel == '<') { +// GEQ_Handle h1 = f_and->add_GEQ(); +// h1.update_coef(lhs, -1); +// h1.update_coef(e1, 1); +// GEQ_Handle h2 = f_and->add_GEQ(); +// h2.update_coef(lhs, -1); +// h2.update_coef(e2, 1); +// } +// else { +// F_Or *f_or = f_and->add_or(); +// F_And *f_and1 = f_or->add_and(); +// EQ_Handle h1 = f_and1->add_EQ(); +// h1.update_coef(lhs, 1); +// h1.update_coef(e1, -1); +// GEQ_Handle h2 = f_and1->add_GEQ(); +// h2.update_coef(e1, -1); +// h2.update_coef(e2, 1); +// F_And *f_and2 = f_or->add_and(); +// EQ_Handle h3 = f_and2->add_EQ(); +// h3.update_coef(lhs, 1); +// h3.update_coef(e2, -1); +// GEQ_Handle h4 = f_and2->add_GEQ(); +// h4.update_coef(e1, 1); +// h4.update_coef(e2, -1); +// } + +// suif2formula(r, f_and, freevars, op1, e1, side, '='); +// suif2formula(r, f_and, freevars, op2, e2, side, '='); +// } +// else if (ins->opcode() == io_max) { +// operand op1 = ins->src_op(0); +// operand op2 = ins->src_op(1); + +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e1 = f_exists->declare(tmp_e()); +// Variable_ID e2 = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// if (rel == '>') { +// GEQ_Handle h1 = f_and->add_GEQ(); +// h1.update_coef(lhs, 1); +// h1.update_coef(e1, -1); +// GEQ_Handle h2 = f_and->add_GEQ(); +// h2.update_coef(lhs, 1); +// h2.update_coef(e2, -1); +// } +// else if (rel == '<') { +// F_Or *f_or = f_and->add_or(); +// F_And *f_and1 = f_or->add_and(); +// GEQ_Handle h1 = f_and1->add_GEQ(); +// h1.update_coef(lhs, -1); +// h1.update_coef(e1, 1); +// F_And *f_and2 = f_or->add_and(); +// GEQ_Handle h2 = f_and2->add_GEQ(); +// h2.update_coef(lhs, -1); +// h2.update_coef(e2, 1); +// } +// else { +// F_Or *f_or = f_and->add_or(); +// F_And *f_and1 = f_or->add_and(); +// EQ_Handle h1 = f_and1->add_EQ(); +// h1.update_coef(lhs, 1); +// h1.update_coef(e1, -1); +// GEQ_Handle h2 = f_and1->add_GEQ(); +// h2.update_coef(e1, 1); +// h2.update_coef(e2, -1); +// F_And *f_and2 = f_or->add_and(); +// EQ_Handle h3 = f_and2->add_EQ(); +// h3.update_coef(lhs, 1); +// h3.update_coef(e2, -1); +// GEQ_Handle h4 = f_and2->add_GEQ(); +// h4.update_coef(e1, -1); +// h4.update_coef(e2, 1); +// } + +// suif2formula(r, f_and, freevars, op1, e1, side, '='); +// suif2formula(r, f_and, freevars, op2, e2, side, '='); +// } +// } + +//----------------------------------------------------------------------------- +// Generate iteration space constraints +//----------------------------------------------------------------------------- + +// void add_loop_stride_constraints(Relation &r, F_And *f_root, +// std::vector<Free_Var_Decl*> &freevars, +// tree_for *tnf, char side) { + +// std::string name(tnf->index()->name()); +// int dim = 0; +// for (;dim < r.n_set(); dim++) +// if (r.set_var(dim+1)->name() == name) +// break; + +// Relation bound = get_loop_bound(r, dim); + +// operand op = tnf->step_op(); +// if (!op.is_null()) { +// if (op.is_immed()) { +// immed im = op.immediate(); +// if (im.is_integer()) { +// int c = im.integer(); + +// if (c != 1 && c != -1) +// add_loop_stride(r, bound, dim, c); +// } +// else +// assert(0); // messy stride +// } +// else +// assert(0); // messy stride +// } +// } + +// void add_loop_bound_constraints(IR_Code *ir, Relation &r, F_And *f_root, +// std::vector<Free_Var_Decl*> &freevars, +// tree_for *tnf, +// char upper_or_lower, char side, IR_CONDITION_TYPE rel) { +// Variable_ID v = find_index(r, tnf->index()->name(), side); + +// tree_node_list *tnl; + +// if (upper_or_lower == 'u') +// tnl = tnf->ub_list(); +// else +// tnl = tnf->lb_list(); + +// tree_node_list_iter iter(tnl); +// while (!iter.is_empty()) { +// tree_node *tn = iter.step(); +// if (tn->kind() != TREE_INSTR) +// break; // messy bounds + +// instruction *ins = static_cast<tree_instr *>(tn)->instr(); + + +// if (upper_or_lower == 'u' && (tnf->test() == FOR_SLT || tnf->test() == FOR_ULT)) { +// operand op1(ins->clone()); +// operand op2(new in_ldc(type_s32, operand(), immed(1))); +// instruction *t = new in_rrr(io_sub, op1.type(), operand(), op1, op2); + +// CG_suifRepr *repr = new CG_suifRepr(operand(t)); +// exp2formula(ir, r, f_root, freevars, repr, v, side, rel, true); +// delete t; +// } +// else if (tnf->test() == FOR_SLT || tnf->test() == FOR_SLTE || tnf->test() == FOR_ULT || tnf->test() == FOR_ULTE) { +// CG_suifRepr *repr = new CG_suifRepr(operand(ins)); +// exp2formula(ir, r, f_root, freevars, repr, v, side, rel, true); +// } +// else +// assert(0); +// } +// } + + +// Relation loop_iteration_space(std::vector<Free_Var_Decl*> &freevars, +// tree_node *tn, std::vector<tree_for*> &loops) { +// Relation r(loops.size()); +// for (unsigned i = 0; i < loops.size(); i++) { +// String s = loops[i]->index()->name(); +// r.name_set_var(i+1, s); +// } + +// F_And *f_root = r.add_and(); + +// std::vector<tree_for *> outer = find_outer_loops(tn); +// std::vector<LexicalOrderType> loops_lex(loops.size(), LEX_UNKNOWN); + +// for (unsigned i = 0; i < outer.size(); i++) { +// unsigned j; + +// for (j = 0; j < loops.size(); j++) { +// if (outer[i] == loops[j]) { +// loops_lex[j] = LEX_MATCH; +// break; +// } else if (outer[i]->index() == loops[j]->index()) { +// loops_lex[j] = lexical_order(outer[i],loops[j]); +// break; +// } +// } + +// if (j != loops.size()) { +// add_loop_bound_constraints(r, f_root, freevars, outer[i], 'l', 's', '>'); +// add_loop_bound_constraints(r, f_root, freevars, outer[i], 'u', 's', '<'); +// add_loop_stride_constraints(r,f_root, freevars, outer[i], 's'); +// } +// } + +// // Add degenerated constraints for non-enclosing loops for this +// // statement. We treat low-dim space as part of whole +// // iteration space. +// LexicalOrderType lex = LEX_MATCH; +// for (unsigned i = 0; i < loops.size(); i++) { +// if (loops_lex[i] != 0) { +// if (lex == LEX_MATCH) +// lex = loops_lex[i]; +// continue; +// } + +// if (lex == LEX_MATCH) { +// for (unsigned j = i+1; j < loops.size(); j++) { +// if (loops_lex[j] == LEX_BEFORE || loops_lex[j] == LEX_AFTER) { +// lex = loops_lex[j]; +// break; +// } +// } +// } + +// if (lex == LEX_MATCH) +// lex = lexical_order(tn, loops[i]); + +// if (lex == LEX_BEFORE) +// add_loop_bound_constraints(r, f_root, freevars, loops[i], 'l', 's', '='); +// else +// add_loop_bound_constraints(r, f_root, freevars, loops[i], 'u', 's', '='); +// } + +// return r; +// } + +// Relation arrays2relation(std::vector<Free_Var_Decl*> &freevars, +// in_array *ia_w, const Relation &IS1_, +// in_array *ia_r, const Relation &IS2_) { +// Relation &IS1 = const_cast<Relation &>(IS1_); +// Relation &IS2 = const_cast<Relation &>(IS2_); + +// Relation r(IS1.n_set(), IS2.n_set()); + +// for (int i = 1; i <= IS1.n_set(); i++) +// r.name_input_var(i, IS1.set_var(i)->name()); + +// for (int i = 1; i <= IS2.n_set(); i++) +// r.name_output_var(i, IS2.set_var(i)->name()+"'"); + +// if (get_sym_of_array(ia_w) != get_sym_of_array(ia_r)) { +// r.add_or(); // False Relation +// return r; +// } + +// F_And *f_root = r.add_and(); + +// for (unsigned i = 0; i < ia_w->dims(); i++) { +// F_Exists *f_exists = f_root->add_exists(); +// Variable_ID e = f_exists->declare(tmp_e()); +// F_And *f_and = f_exists->add_and(); + +// suif2formula(r, f_and, freevars, ia_w->index(i), e, 'w', '='); +// suif2formula(r, f_and, freevars, ia_r->index(i), e, 'r', '='); +// } + +// // add iteration space restriction +// r = Restrict_Domain(r, copy(IS1)); +// r = Restrict_Range(r, copy(IS2)); + +// // reset the output variable names lost in restriction +// for (int i = 1; i <= IS2.n_set(); i++) +// r.name_output_var(i, IS2.set_var(i)->name()+"'"); + +// return r; +// } + + +// std::vector<DependenceVector> relation2dependences (IR_Code *ir, in_array *ia_w, in_array *ia_r, const Relation &r) { +// assert(r.n_inp() == r.n_out()); + +// std::vector<DependenceVector> dependences; + +// std::stack<DependenceLevel> working; +// working.push(DependenceLevel(r, r.n_inp())); + +// while (!working.empty()) { +// DependenceLevel dep = working.top(); +// working.pop(); + +// // No dependence exists, move on. +// if (!dep.r.is_satisfiable()) +// continue; + +// if (dep.level == r.n_inp()) { +// DependenceVector dv; + +// // for loop independent dependence, use lexical order to +// // determine the correct source and destination +// if (dep.dir == 0) { +// LexicalOrderType order = lexical_order(ia_w->parent(), ia_r->parent()); + +// if (order == LEX_MATCH) +// continue; //trivial self zero-dependence +// else if (order == LEX_AFTER) { +// dv.src = new IR_suifArrayRef(ir, ia_r); +// dv.dst = new IR_suifArrayRef(ir, ia_w); +// } +// else { +// dv.src = new IR_suifArrayRef(ir, ia_w); +// dv.dst = new IR_suifArrayRef(ir,ia_r); +// } +// } +// else if (dep.dir == 1) { +// dv.src = new IR_suifArrayRef(ir, ia_w); +// dv.dst = new IR_suifArrayRef(ir, ia_r); +// } +// else { // dep.dir == -1 +// dv.src = new IR_suifArrayRef(ir, ia_r); +// dv.dst = new IR_suifArrayRef(ir, ia_w); +// } + +// dv.lbounds = dep.lbounds; +// dv.ubounds = dep.ubounds; + +// // // set the dependence type +// // if (is_lhs(dv.source) && is_lhs(dv.dest)) +// // dv.type = 'o'; +// // else if (!is_lhs(dv.source) && ! is_lhs(dv.dest)) +// // dv.type = 'i'; +// // else if (is_lhs(dv.source)) +// // dv.type = 'f'; +// // else +// // dv.type = 'a'; + +// dependences.push_back(dv); +// } +// else { +// // now work on the next dimension level +// int level = ++dep.level; + +// coef_t lbound, ubound; +// Relation delta = Deltas(copy(dep.r)); +// delta.query_variable_bounds(delta.set_var(level), lbound, ubound); + +// if (dep.dir == 0) { +// if (lbound > 0) { +// dep.dir = 1; +// dep.lbounds[level-1] = lbound; +// dep.ubounds[level-1] = ubound; + +// working.push(dep); +// } +// else if (ubound < 0) { +// dep.dir = -1; +// dep.lbounds[level-1] = -ubound; +// dep.ubounds[level-1] = -lbound; + +// working.push(dep); +// } +// else { +// // split the dependence vector into flow- and anti-dependence +// // for the first non-zero distance, also separate zero distance +// // at this level. +// { +// DependenceLevel dep2 = dep; + +// dep2.lbounds[level-1] = 0; +// dep2.ubounds[level-1] = 0; + +// F_And *f_root = dep2.r.and_with_and(); +// EQ_Handle h = f_root->add_EQ(); +// h.update_coef(dep2.r.input_var(level), 1); +// h.update_coef(dep2.r.output_var(level), -1); + +// working.push(dep2); +// } + +// if (lbound < 0 && ia_w != ia_r) { +// DependenceLevel dep2 = dep; + +// F_And *f_root = dep2.r.and_with_and(); +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(dep2.r.input_var(level), 1); +// h.update_coef(dep2.r.output_var(level), -1); +// h.update_const(-1); + +// // get tighter bounds under new constraints +// coef_t lbound, ubound; +// delta = Deltas(copy(dep2.r)); +// delta.query_variable_bounds(delta.set_var(level), +// lbound, ubound); + +// dep2.dir = -1; +// dep2.lbounds[level-1] = max(-ubound,static_cast<coef_t>(1)); // use max() to avoid Omega retardness +// dep2.ubounds[level-1] = -lbound; + +// working.push(dep2); +// } + +// if (ubound > 0) { +// DependenceLevel dep2 = dep; + +// F_And *f_root = dep2.r.and_with_and(); +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(dep2.r.input_var(level), -1); +// h.update_coef(dep2.r.output_var(level), 1); +// h.update_const(-1); + +// // get tighter bonds under new constraints +// coef_t lbound, ubound; +// delta = Deltas(copy(dep2.r)); +// delta.query_variable_bounds(delta.set_var(level), +// lbound, ubound); +// dep2.dir = 1; +// dep2.lbounds[level-1] = max(lbound,static_cast<coef_t>(1)); // use max() to avoid Omega retardness +// dep2.ubounds[level-1] = ubound; + +// working.push(dep2); +// } +// } +// } +// // now deal with dependence vector with known direction +// // determined at previous levels +// else { +// // For messy bounds, further test to see if the dependence distance +// // can be reduced to positive/negative. This is an omega hack. +// if (lbound == negInfinity && ubound == posInfinity) { +// { +// Relation t = dep.r; +// F_And *f_root = t.and_with_and(); +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(t.input_var(level), 1); +// h.update_coef(t.output_var(level), -1); +// h.update_const(-1); + +// if (!t.is_satisfiable()) { +// lbound = 0; +// } +// } +// { +// Relation t = dep.r; +// F_And *f_root = t.and_with_and(); +// GEQ_Handle h = f_root->add_GEQ(); +// h.update_coef(t.input_var(level), -1); +// h.update_coef(t.output_var(level), 1); +// h.update_const(-1); + +// if (!t.is_satisfiable()) { +// ubound = 0; +// } +// } +// } + +// // Same thing as above, test to see if zero dependence +// // distance possible. +// if (lbound == 0 || ubound == 0) { +// Relation t = dep.r; +// F_And *f_root = t.and_with_and(); +// EQ_Handle h = f_root->add_EQ(); +// h.update_coef(t.input_var(level), 1); +// h.update_coef(t.output_var(level), -1); + +// if (!t.is_satisfiable()) { +// if (lbound == 0) +// lbound = 1; +// if (ubound == 0) +// ubound = -1; +// } +// } + +// if (dep.dir == -1) { +// dep.lbounds[level-1] = -ubound; +// dep.ubounds[level-1] = -lbound; +// } +// else { // dep.dir == 1 +// dep.lbounds[level-1] = lbound; +// dep.ubounds[level-1] = ubound; +// } + +// working.push(dep); +// } +// } +// } + +// return dependences; +// } + +//----------------------------------------------------------------------------- +// Determine whether the loop (starting from 0) in the iteration space +// has only one iteration. +//----------------------------------------------------------------------------- +bool is_single_loop_iteration(const Relation &r, int level, const Relation &known) { + int n = r.n_set(); + Relation r1 = Intersection(copy(r), Extend_Set(copy(known), n-known.n_set())); + + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= level; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + } + r1 = Range(Restrict_Domain(mapping, r1)); + r1.simplify(); + + Variable_ID v = r1.set_var(level); + for (DNF_Iterator di(r1.query_DNF()); di; di++) { + bool is_single = false; + for (EQ_Iterator ei((*di)->EQs()); ei; ei++) + if ((*ei).get_coef(v) != 0 && !(*ei).has_wildcards()) { + is_single = true; + break; + } + + if (!is_single) + return false; + } + + return true; +} + + + + +bool is_single_iteration(const Relation &r, int dim) { + assert(r.is_set()); + const int n = r.n_set(); + + if (dim >= n) + return true; + + Relation bound = get_loop_bound(r, dim); + +// if (!bound.has_single_conjunct()) +// return false; + +// Conjunct *c = bound.query_DNF()->single_conjunct(); + + for (DNF_Iterator di(bound.query_DNF()); di; di++) { + bool is_single = false; + for (EQ_Iterator ei((*di)->EQs()); ei; ei++) + if (!(*ei).has_wildcards()) { + is_single = true; + break; + } + + if (!is_single) + return false; + } + + return true; + + + + +// Relation r = copy(r_); +// const int n = r.n_set(); + +// if (dim >= n) +// return true; + +// Relation bound = get_loop_bound(r, dim); +// bound = Approximate(bound); +// Conjunct *c = bound.query_DNF()->single_conjunct(); + +// return c->n_GEQs() == 0; + + + + + +// Relation r = copy(r_); +// r.simplify(); +// const int n = r.n_set(); + +// if (dim >= n) +// return true; + +// for (DNF_Iterator i(r.query_DNF()); i; i++) { +// std::vector<bool> is_single(n); +// for (int j = 0; j < dim; j++) +// is_single[j] = true; +// for (int j = dim; j < n; j++) +// is_single[j] = false; + +// bool found_new_single = true; +// while (found_new_single) { +// found_new_single = false; + +// for (EQ_Iterator j = (*i)->EQs(); j; j++) { +// int saved_pos = -1; +// for (Constr_Vars_Iter k(*j); k; k++) +// if ((*k).var->kind() == Set_Var || (*k).var->kind() == Input_Var) { +// int pos = (*k).var->get_position() - 1; +// if (!is_single[pos]) +// if (saved_pos == -1) +// saved_pos = pos; +// else { +// saved_pos = -1; +// break; +// } +// } + +// if (saved_pos != -1) { +// is_single[saved_pos] = true; +// found_new_single = true; +// } +// } + +// if (is_single[dim]) +// break; +// } + +// if (!is_single[dim]) +// return false; +// } + +// return true; +} + +//----------------------------------------------------------------------------- +// Set/get the value of a variable which is know to be constant. +//----------------------------------------------------------------------------- +void assign_const(Relation &r, int dim, int val) { + const int n = r.n_out(); + + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + + for (int i = 1; i <= n; i++) { + if (i != dim+1) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), 1); + h.update_coef(mapping.input_var(i), -1); + } + else { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.output_var(i), 1); + h.update_const(-val); + } + } + + r = Composition(mapping, r); +} + + +int get_const(const Relation &r, int dim, Var_Kind type) { +// Relation rr = copy(r); + Relation &rr = const_cast<Relation &>(r); + + Variable_ID v; + switch (type) { + // case Set_Var: + // v = rr.set_var(dim+1); + // break; + case Input_Var: + v = rr.input_var(dim+1); + break; + case Output_Var: + v = rr.output_var(dim+1); + break; + default: + throw std::invalid_argument("unsupported variable type"); + } + + for (DNF_Iterator di(rr.query_DNF()); di; di++) + for (EQ_Iterator ei = (*di)->EQs(); ei; ei++) + if ((*ei).is_const(v)) + return (*ei).get_const(); + + throw std::runtime_error("cannot get variable's constant value"); +} + + + + + + +//--------------------------------------------------------------------------- +// Get the bound for a specific loop. +//--------------------------------------------------------------------------- +Relation get_loop_bound(const Relation &r, int dim) { + assert(r.is_set()); + const int n = r.n_set(); + +// Relation r1 = project_onto_levels(copy(r), dim+1, true); + Relation mapping(n,n); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= dim+1; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + } + Relation r1 = Range(Restrict_Domain(mapping, copy(r))); + for (int i = 1; i <= n; i++) + r1.name_set_var(i, const_cast<Relation &>(r).set_var(i)->name()); + r1.setup_names(); + Relation r2 = Project(copy(r1), dim+1, Set_Var); + + return Gist(r1, r2, 1); +} + +Relation get_loop_bound(const Relation &r, int level, const Relation &known) { + int n = r.n_set(); + Relation r1 = Intersection(copy(r), Extend_Set(copy(known), n-known.n_set())); + + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= level; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + } + r1 = Range(Restrict_Domain(mapping, r1)); + Relation r2 = Project(copy(r1), level, Set_Var); + r1 = Gist(r1, r2, 1); + + for (int i = 1; i <= n; i++) + r1.name_set_var(i, const_cast<Relation &>(r).set_var(i)->name()); + r1.setup_names(); + + return r1; +} + + + +Relation get_max_loop_bound(const std::vector<Relation> &r, int dim) { + if (r.size() == 0) + return Relation::Null(); + + const int n = r[0].n_set(); + Relation res(Relation::False(n)); + for (int i = 0; i < r.size(); i++) { + Relation &t = const_cast<Relation &>(r[i]); + if (t.is_satisfiable()) + res = Union(get_loop_bound(t, dim), res); + } + + res.simplify(); + + return res; +} + +Relation get_min_loop_bound(const std::vector<Relation> &r, int dim) { + if (r.size() == 0) + return Relation::Null(); + + const int n = r[0].n_set(); + Relation res(Relation::True(n)); + for (int i = 0; i < r.size(); i++) { + Relation &t = const_cast<Relation &>(r[i]); + if (t.is_satisfiable()) + res = Intersection(get_loop_bound(t, dim), res); + } + + res.simplify(); + + return res; +} + +//----------------------------------------------------------------------------- +// Add strident to a loop. +// Issues: +// - Don't work with relations with multiple disjuncts. +// - Omega's dealing with max lower bound is awkward. +//----------------------------------------------------------------------------- +void add_loop_stride(Relation &r, const Relation &bound_, int dim, int stride) { + F_And *f_root = r.and_with_and(); + Relation &bound = const_cast<Relation &>(bound_); + for (DNF_Iterator di(bound.query_DNF()); di; di++) { + F_Exists *f_exists = f_root->add_exists(); + Variable_ID e1 = f_exists->declare(tmp_e()); + Variable_ID e2 = f_exists->declare(tmp_e()); + F_And *f_and = f_exists->add_and(); + EQ_Handle stride_eq = f_and->add_EQ(); + stride_eq.update_coef(e1, 1); + stride_eq.update_coef(e2, stride); + if (!r.is_set()) + stride_eq.update_coef(r.output_var(dim+1), -1); + else + stride_eq.update_coef(r.set_var(dim+1), -1); + F_Or *f_or = f_and->add_or(); + + for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) { + if ((*gi).get_coef(bound.set_var(dim+1)) > 0) { + // copy the lower bound constraint + EQ_Handle h1 = f_or->add_and()->add_EQ(); + GEQ_Handle h2 = f_and->add_GEQ(); + for (Constr_Vars_Iter ci(*gi); ci; ci++) { + switch ((*ci).var->kind()) { + // case Set_Var: + case Input_Var: { + int pos = (*ci).var->get_position(); + if (pos == dim + 1) { + h1.update_coef(e1, (*ci).coef); + h2.update_coef(e1, (*ci).coef); + } + else { + if (!r.is_set()) { + h1.update_coef(r.output_var(pos), (*ci).coef); + h2.update_coef(r.output_var(pos), (*ci).coef); + } + else { + h1.update_coef(r.set_var(pos), (*ci).coef); + h2.update_coef(r.set_var(pos), (*ci).coef); + } + } + break; + } + case Global_Var: { + Global_Var_ID g = (*ci).var->get_global_var(); + h1.update_coef(r.get_local(g, (*ci).var->function_of()), (*ci).coef); + h2.update_coef(r.get_local(g, (*ci).var->function_of()), (*ci).coef); + break; + } + default: + break; + } + } + h1.update_const((*gi).get_const()); + h2.update_const((*gi).get_const()); + } + } + } +} + + +bool is_inner_loop_depend_on_level(const Relation &r, int level, const Relation &known) { + Relation r1 = Intersection(copy(r), Extend_Set(copy(known), r.n_set()-known.n_set())); + Relation r2 = copy(r1); + for (int i = level+1; i <= r2.n_set(); i++) + r2 = Project(r2, r2.set_var(i)); + r2.simplify(2, 4); + Relation r3 = Gist(r1, r2); + + Variable_ID v = r3.set_var(level); + for (DNF_Iterator di(r3.query_DNF()); di; di++) { + for (EQ_Iterator ei = (*di)->EQs(); ei; ei++) + if ((*ei).get_coef(v) != 0) + return true; + + for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) + if ((*gi).get_coef(v) != 0) + return true; + } + + return false; +} + + +//----------------------------------------------------------------------------- +// Suppose loop dim is i. Replace i with i+adjustment in loop bounds. +// e.g. do i = 1, n +// do j = i, n +// after call with dim = 0 and adjustment = 1: +// do i = 1, n +// do j = i+1, n +// ----------------------------------------------------------------------------- +Relation adjust_loop_bound(const Relation &r, int level, int adjustment) { + if (adjustment == 0) + return copy(r); + + const int n = r.n_set(); + Relation r1 = copy(r); + for (int i = level+1; i <= r1.n_set(); i++) + r1 = Project(r1, r1.set_var(i)); + r1.simplify(2, 4); + Relation r2 = Gist(copy(r), copy(r1)); + + Relation mapping(n, n); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= n; i++) + if (i == level) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(level), -1); + h.update_coef(mapping.output_var(level), 1); + h.update_const(static_cast<coef_t>(adjustment)); + } + else { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), -1); + h.update_coef(mapping.output_var(i), 1); + } + + r2 = Range(Restrict_Domain(mapping, r2)); + r1 = Intersection(r1, r2); + r1.simplify(); + + for (int i = 1; i <= n; i++) + r1.name_set_var(i, const_cast<Relation &>(r).set_var(i)->name()); + r1.setup_names(); + return r1; +} + + +// commented out on 07/14/2010 +// void adjust_loop_bound(Relation &r, int dim, int adjustment, std::vector<Free_Var_Decl *> globals) { +// assert(r.is_set()); + +// if (adjustment == 0) +// return; + +// const int n = r.n_set(); +// Tuple<std::string> name(n); +// for (int i = 1; i <= n; i++) +// name[i] = r.set_var(i)->name(); + +// Relation r1 = project_onto_levels(copy(r), dim+1, true); +// Relation r2 = Gist(copy(r), copy(r1)); + +// // remove old bogus global variable conditions since we are going to +// // update the value. +// if (globals.size() > 0) +// r1 = Gist(r1, project_onto_levels(copy(r), 0, true)); + +// Relation r4 = Relation::True(n); + +// for (DNF_Iterator di(r2.query_DNF()); di; di++) { +// for (EQ_Iterator ei = (*di)->EQs(); ei; ei++) { +// EQ_Handle h = r4.and_with_EQ(*ei); + +// Variable_ID v = r2.set_var(dim+1); +// coef_t c = (*ei).get_coef(v); +// if (c != 0) +// h.update_const(c*adjustment); + +// for (int i = 0; i < globals.size(); i++) { +// Variable_ID v = r2.get_local(globals[i]); +// coef_t c = (*ei).get_coef(v); +// if (c != 0) +// h.update_const(c*adjustment); +// } +// } + +// for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) { +// GEQ_Handle h = r4.and_with_GEQ(*gi); + +// Variable_ID v = r2.set_var(dim+1); +// coef_t c = (*gi).get_coef(v); +// if (c != 0) +// h.update_const(c*adjustment); + +// for (int i = 0; i < globals.size(); i++) { +// Variable_ID v = r2.get_local(globals[i]); +// coef_t c = (*gi).get_coef(v); +// if (c != 0) +// h.update_const(c*adjustment); +// } +// } +// } +// r = Intersection(r1, r4); +// // } +// // else +// // r = Intersection(r1, r2); + +// for (int i = 1; i <= n; i++) +// r.name_set_var(i, name[i]); +// r.setup_names(); +// } + + +// void adjust_loop_bound(Relation &r, int dim, int adjustment) { +// assert(r.is_set()); +// const int n = r.n_set(); +// Tuple<String> name(n); +// for (int i = 1; i <= n; i++) +// name[i] = r.set_var(i)->name(); + +// Relation r1 = project_onto_levels(copy(r), dim+1, true); +// Relation r2 = Gist(r, copy(r1)); + +// Relation r3(n, n); +// F_And *f_root = r3.add_and(); +// for (int i = 0; i < n; i++) { +// EQ_Handle h = f_root->add_EQ(); +// h.update_coef(r3.output_var(i+1), 1); +// h.update_coef(r3.input_var(i+1), -1); +// if (i == dim) +// h.update_const(adjustment); +// } + +// r2 = Range(Restrict_Domain(r3, r2)); +// r = Intersection(r1, r2); + +// for (int i = 1; i <= n; i++) +// r.name_set_var(i, name[i]); +// r.setup_names(); +// } + +// void adjust_loop_bound(Relation &r, int dim, Free_Var_Decl *global_var, int adjustment) { +// assert(r.is_set()); +// const int n = r.n_set(); +// Tuple<String> name(n); +// for (int i = 1; i <= n; i++) +// name[i] = r.set_var(i)->name(); + +// Relation r1 = project_onto_levels(copy(r), dim+1, true); +// Relation r2 = Gist(r, copy(r1)); + +// Relation r3(n); +// Variable_ID v = r2.get_local(global_var); + +// for (DNF_Iterator di(r2.query_DNF()); di; di++) { +// for (EQ_Iterator ei = (*di)->EQs(); ei; ei++) { +// coef_t c = (*ei).get_coef(v); +// EQ_Handle h = r3.and_with_EQ(*ei); +// if (c != 0) +// h.update_const(c*adjustment); +// } +// for (GEQ_Iterator gi = (*di)->GEQs(); gi; gi++) { +// coef_t c = (*gi).get_coef(v); +// GEQ_Handle h = r3.and_with_GEQ(*gi); +// if (c != 0) +// h.update_const(c*adjustment); +// } +// } + +// r = Intersection(r1, r3); +// for (int i = 1; i <= n; i++) +// r.name_set_var(i, name[i]); +// r.setup_names(); +// } + + + +//------------------------------------------------------------------------------ +// If the dimension has value posInfinity, the statement should be privatized +// at this dimension. +//------------------------------------------------------------------------------ +// boolean is_private_statement(const Relation &r, int dim) { +// int n; +// if (r.is_set()) +// n = r.n_set(); +// else +// n = r.n_out(); + +// if (dim >= n) +// return false; + +// try { +// coef_t c; +// if (r.is_set()) +// c = get_const(r, dim, Set_Var); +// else +// c = get_const(r, dim, Output_Var); +// if (c == posInfinity) +// return true; +// else +// return false; +// } +// catch (loop_error e){ +// } + +// return false; +// } + + + +// // ---------------------------------------------------------------------------- +// // Calculate v mod dividend based on equations inside relation r. +// // Return posInfinity if it is not a constant. +// // ---------------------------------------------------------------------------- +// static coef_t mod_(const Relation &r_, Variable_ID v, int dividend, std::set<Variable_ID> &working_on) { +// assert(dividend > 0); +// if (v->kind() == Forall_Var || v->kind() == Exists_Var || v->kind() == Wildcard_Var) +// return posInfinity; + +// working_on.insert(v); + +// Relation &r = const_cast<Relation &>(r_); +// Conjunct *c = r.query_DNF()->single_conjunct(); + +// for (EQ_Iterator ei(c->EQs()); ei; ei++) { +// int coef = mod((*ei).get_coef(v), dividend); +// if (coef != 1 && coef != dividend - 1 ) +// continue; + +// coef_t result = 0; +// for (Constr_Vars_Iter cvi(*ei); cvi; cvi++) +// if ((*cvi).var != v) { +// int p = mod((*cvi).coef, dividend); + +// if (p == 0) +// continue; + +// if (working_on.find((*cvi).var) != working_on.end()) { +// result = posInfinity; +// break; +// } + +// coef_t q = mod_(r, (*cvi).var, dividend, working_on); +// if (q == posInfinity) { +// result = posInfinity; +// break; +// } +// result += p * q; +// } + +// if (result != posInfinity) { +// result += (*ei).get_const(); +// if (coef == 1) +// result = -result; +// working_on.erase(v); + +// return mod(result, dividend); +// } +// } + +// working_on.erase(v); +// return posInfinity; +// } + + +// coef_t mod(const Relation &r, Variable_ID v, int dividend) { +// std::set<Variable_ID> working_on = std::set<Variable_ID>(); + +// return mod_(r, v, dividend, working_on); +// } + + + +//----------------------------------------------------------------------------- +// Generate mapping relation for permuation. +//----------------------------------------------------------------------------- +Relation permute_relation(const std::vector<int> &pi) { + const int n = pi.size(); + + Relation r(n, n); + F_And *f_root = r.add_and(); + + for (int i = 0; i < n; i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(r.output_var(i+1), 1); + h.update_coef(r.input_var(pi[i]+1), -1); + } + + return r; +} + + + +//--------------------------------------------------------------------------- +// Find the position index variable in a Relation by name. +//--------------------------------------------------------------------------- +Variable_ID find_index(Relation &r, const std::string &s, char side) { + // Omega quirks: assure the names are propagated inside the relation + r.setup_names(); + + if (r.is_set()) { // side == 's' + for (int i = 1; i <= r.n_set(); i++) { + std::string ss = r.set_var(i)->name(); + if (s == ss) { + return r.set_var(i); + } + } + } + else if (side == 'w') { + for (int i = 1; i <= r.n_inp(); i++) { + std::string ss = r.input_var(i)->name(); + if (s == ss) { + return r.input_var(i); + } + } + } + else { // side == 'r' + for (int i = 1; i <= r.n_out(); i++) { + std::string ss = r.output_var(i)->name(); + if (s+"'" == ss) { + return r.output_var(i); + } + } + } + + return NULL; +} + +// EQ_Handle get_eq(const Relation &r, int dim, Var_Kind type) { +// Variable_ID v; +// switch (type) { +// case Set_Var: +// v = r.set_var(dim+1); +// break; +// case Input_Var: +// v = r.input_var(dim+1); +// break; +// case Output_Var: +// v = r.output_var(dim+1); +// break; +// default: +// return NULL; +// } +// for (DNF_iterator di(r.query_DNF()); di; di++) +// for (EQ_Iterator ei = (*di)->EQs(); ei; ei++) +// if ((*ei).get_coef(v) != 0) +// return (*ei); + +// return NULL; +// } + + +// std::Pair<Relation, Relation> split_loop(const Relation &r, const Relation &cond) { +// Relation r1 = Intersection(copy(r), copy(cond)); +// Relation r2 = Intersection(copy(r), Complement(copy(cond))); + +// return std::Pair<Relation, Relation>(r1, r2); +// } diff --git a/omegatools.hh b/omegatools.hh new file mode 100644 index 0000000..206079c --- /dev/null +++ b/omegatools.hh @@ -0,0 +1,97 @@ +#ifndef OMEGATOOLS_HH +#define OMEGATOOLS_HH + +#include <string> +#include <omega.h> +#include "dep.hh" +#include "ir_code.hh" + +std::string tmp_e(); + +void exp2formula(IR_Code *ir, omega::Relation &r, omega::F_And *f_root, + std::vector<omega::Free_Var_Decl *> &freevars, + omega::CG_outputRepr *repr, omega::Variable_ID lhs, char side, + IR_CONDITION_TYPE rel, bool destroy); +omega::Relation arrays2relation(IR_Code *ir, std::vector<omega::Free_Var_Decl*> &freevars, + const IR_ArrayRef *ref_src, const omega::Relation &IS_w, + const IR_ArrayRef *ref_dst, const omega::Relation &IS_r); +std::pair<std::vector<DependenceVector>, std::vector<DependenceVector> > relation2dependences( + const IR_ArrayRef *ref_src, const IR_ArrayRef *ref_dst, const omega::Relation &r); + +void exp2constraint(IR_Code *ir, omega::Relation &r, omega::F_And *f_root, + std::vector<omega::Free_Var_Decl *> &freevars, + omega::CG_outputRepr *repr, bool destroy); + +// suif legacy code +// void suif2formula(Relation &r, F_And *f_root, +// std::vector<Free_Var_Decl*> &freevars, +// operand op, Variable_ID lhs, +// char side, char rel); +// void suif2formula(Relation &r, F_And *f_root, +// std::vector<Free_Var_Decl*> &freevars, +// instruction *ins, Variable_ID lhs, +// char side, char rel); +// void add_loop_stride_constraints(omega::Relation &r, omega::F_And *f_root, +// std::vector<omega::Free_Var_Decl*> &freevars, +// tree_for *tnf, char side); +// void add_loop_bound_constraints(IR_Code *ir, omega::Relation &r, omega::F_And *f_root, +// std::vector<omega::Free_Var_Decl*> &freevars, +// tree_for *tnf, +// char upper_or_lower, char side, IR_CONDITION_TYPE rel); +// Relation loop_iteration_space(std::vector<Free_Var_Decl*> &freevars, +// tree_node *tn, std::vector<tree_for*> &loops); + +// Relation arrays2relation(std::vector<Free_Var_Decl*> &freevars, +// in_array *ia_w, const Relation &IS1, +// in_array *ia_r, const Relation &IS2); +// std::vector<DependenceVector> relation2dependences(IR_Code *ir, in_array *ia_w, +// in_array *ia_r, const Relation &r); + +// end of suif legacy code + +bool is_single_iteration(const omega::Relation &r, int dim); +void assign_const(omega::Relation &r, int dim, int val); +int get_const(const omega::Relation &r, int dim, omega::Var_Kind type); +omega::Variable_ID find_index(omega::Relation &r, const std::string &s, char side); +omega::Relation permute_relation(const std::vector<int> &pi); +omega::Relation get_loop_bound(const omega::Relation &r, int dim); +bool is_single_loop_iteration(const omega::Relation &r, int level, const omega::Relation &known); +omega::Relation get_loop_bound(const omega::Relation &r, int level, const omega::Relation &known); +omega::Relation get_max_loop_bound(const std::vector<omega::Relation> &r, int dim); +omega::Relation get_min_loop_bound(const std::vector<omega::Relation> &r, int dim); +void add_loop_stride(omega::Relation &r, const omega::Relation &bound, int dim, int stride); +bool is_inner_loop_depend_on_level(const omega::Relation &r, int level, const omega::Relation &known); +// void adjust_loop_bound(omega::Relation &r, int dim, int adjustment, std::vector<omega::Free_Var_Decl *> globals = std::vector<omega::Free_Var_Decl *>()); +omega::Relation adjust_loop_bound(const omega::Relation &r, int level, int adjustment); +// void adjust_loop_bound(Relation &r, int dim, int adjustment); +// void adjust_loop_bound(Relation &r, int dim, Free_Var_Decl *global_var, int adjustment); +// boolean is_private_statement(const omega::Relation &r, int dim); + +// coef_t mod(const Relation &r, Variable_ID v, int dividend); + + +enum LexicalOrderType {LEX_MATCH, LEX_BEFORE, LEX_AFTER, LEX_UNKNOWN}; + +// template <typename T> +// LexicalOrderType lexical_order(const std::vector<T> &a, const std::vector<T> &b) { +// int size = min(a.size(), b.size()); +// for (int i = 0; i < size; i++) { +// if (a[i] < b[i]) +// return LEX_BEFORE; +// else if (b[i] < a[i]) +// return LEX_AFTER; +// } +// if (a.size() < b.size()) +// return LEX_BEFORE; +// else if (b.size() < a.size()) +// return LEX_AFTER; +// else +// return LEX_MATCH; +// } + +// struct LoopException { +// std::string descr; +// LoopException(const std::string &s): descr(s) {}; +// }; + +#endif diff --git a/orig_loop_datacopy.cc b/orig_loop_datacopy.cc new file mode 100644 index 0000000..04741bc --- /dev/null +++ b/orig_loop_datacopy.cc @@ -0,0 +1,1175 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California + Copyright (C) 2009-2010 University of Utah + All Rights Reserved. + + Purpose: + Various data copy schemes. + + Notes: + + History: + 02/20/09 Created by Chun Chen by splitting original datacopy from loop.cc +*****************************************************************************/ + +#include <code_gen/code_gen.h> +#include <code_gen/output_repr.h> +#include "loop.hh" +#include "omegatools.hh" +#include "ir_code.hh" +#include "chill_error.hh" + +using namespace omega; + +// +// data copy function by referring arrays by numbers. +// e.g. A[i] = A[i-1] + B[i] +// parameter array_ref_num=[0,2] means to copy data touched by A[i-1] and A[i] +// +bool Loop::datacopy(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + // check for sanity of parameters + std::set<int> same_loop; + for (int i = 0; i < array_ref_nums.size(); i++) { + int stmt_num = array_ref_nums[i].first; + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + if (i == 0) { + std::vector<int> lex = getLexicalOrder(stmt_num); + same_loop = getStatements(lex, 2*level-2); + } + else if (same_loop.find(stmt_num) == same_loop.end()) + throw std::invalid_argument("array references for data copy must be located in the same subloop"); + } + + // convert array reference numbering scheme to actual array references + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (int i = 0; i < array_ref_nums.size(); i++) { + if (array_ref_nums[i].second.size() == 0) + continue; + + int stmt_num = array_ref_nums[i].first; + selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>())); + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code); + std::vector<bool> selected(refs.size(), false); + for (int j = 0; j < array_ref_nums[i].second.size(); j++) { + int ref_num = array_ref_nums[i].second[j]; + if (ref_num < 0 || ref_num >= refs.size()) { + for (int k = 0; k < refs.size(); k++) + delete refs[k]; + throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); + } + selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); + selected[ref_num] = true; + } + for (int j = 0; j < refs.size(); j++) + if (!selected[j]) + delete refs[j]; + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references to copy"); + + // do the copy + return datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + +// +// data copy function by referring arrays by name. +// e.g. A[i] = A[i-1] + B[i] +// parameter array_name=A means to copy data touched by A[i-1] and A[i] +// +bool Loop::datacopy(int stmt_num, int level, const std::string &array_name, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + // collect array references by name + std::vector<int> lex = getLexicalOrder(stmt_num); + int dim = 2*level - 1; + std::set<int> same_loop = getStatements(lex, dim-1); + + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + std::vector<IR_ArrayRef *> t; + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code); + for (int j = 0; j < refs.size(); j++) + if (refs[j]->name() == array_name) + t.push_back(refs[j]); + else + delete refs[j]; + if (t.size() != 0) + selected_refs.push_back(std::make_pair(*i, t)); + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); + + // do the copy + return datacopy_privatized(selected_refs, level, std::vector<int>(), allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + + +bool Loop::datacopy_privatized(int stmt_num, int level, const std::string &array_name, const std::vector<int> &privatized_levels, + bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + // check for sanity of parameters + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + + // collect array references by name + std::vector<int> lex = getLexicalOrder(stmt_num); + int dim = 2*level - 1; + std::set<int> same_loop = getStatements(lex, dim-1); + + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (std::set<int>::iterator i = same_loop.begin(); i != same_loop.end(); i++) { + selected_refs.push_back(std::make_pair(*i, std::vector<IR_ArrayRef *>())); + + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[*i].code); + for (int j = 0; j < refs.size(); j++) + if (refs[j]->name() == array_name) + selected_refs[selected_refs.size()-1].second.push_back(refs[j]); + else + delete refs[j]; + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references with name " + to_string(array_name) + " to copy"); + + // do the copy + return datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + + +bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<int> > > &array_ref_nums, int level, const std::vector<int> &privatized_levels, bool allow_extra_read, int fastest_changing_dimension, int padding_stride, int padding_alignment, int memory_type) { + // check for sanity of parameters + std::set<int> same_loop; + for (int i = 0; i < array_ref_nums.size(); i++) { + int stmt_num = array_ref_nums[i].first; + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (level <= 0 || level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level)); + if (i == 0) { + std::vector<int> lex = getLexicalOrder(stmt_num); + same_loop = getStatements(lex, 2*level-2); + } + else if (same_loop.find(stmt_num) == same_loop.end()) + throw std::invalid_argument("array references for data copy must be located in the same subloop"); + } + + // convert array reference numbering scheme to actual array references + std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > selected_refs; + for (int i = 0; i < array_ref_nums.size(); i++) { + if (array_ref_nums[i].second.size() == 0) + continue; + + int stmt_num = array_ref_nums[i].first; + selected_refs.push_back(std::make_pair(stmt_num, std::vector<IR_ArrayRef *>())); + std::vector<IR_ArrayRef *> refs = ir->FindArrayRef(stmt[stmt_num].code); + std::vector<bool> selected(refs.size(), false); + for (int j = 0; j < array_ref_nums[i].second.size(); j++) { + int ref_num = array_ref_nums[i].second[j]; + if (ref_num < 0 || ref_num >= refs.size()) { + for (int k = 0; k < refs.size(); k++) + delete refs[k]; + throw std::invalid_argument("invalid array reference number " + to_string(ref_num) + " in statement " + to_string(stmt_num)); + } + selected_refs[selected_refs.size()-1].second.push_back(refs[ref_num]); + selected[ref_num] = true; + } + for (int j = 0; j < refs.size(); j++) + if (!selected[j]) + delete refs[j]; + } + if (selected_refs.size() == 0) + throw std::invalid_argument("found no array references to copy"); + + // do the copy + return datacopy_privatized(selected_refs, level, privatized_levels, allow_extra_read, fastest_changing_dimension, padding_stride, padding_alignment, memory_type); +} + + +// +// Implement low level datacopy function with lots of options. +// +bool Loop::datacopy_privatized(const std::vector<std::pair<int, std::vector<IR_ArrayRef *> > > &stmt_refs, int level, + const std::vector<int> &privatized_levels, + bool allow_extra_read, int fastest_changing_dimension, + int padding_stride, int padding_alignment, int memory_type) { + if (stmt_refs.size() == 0) + return true; + + // check for sanity of parameters + IR_ArraySymbol *sym = NULL; + std::vector<int> lex; + std::set<int> active; + if (level <= 0) + throw std::invalid_argument("invalid loop level " + to_string(level)); + for (int i = 0; i < privatized_levels.size(); i++) { + if (i == 0) { + if (privatized_levels[i] < level) + throw std::invalid_argument("privatized loop levels must be no less than level " + to_string(level)); + } + else if (privatized_levels[i] <= privatized_levels[i-1]) + throw std::invalid_argument("privatized loop levels must be in ascending order"); + } + for (int i = 0; i < stmt_refs.size(); i++) { + int stmt_num = stmt_refs[i].first; + active.insert(stmt_num); + if (stmt_num < 0 || stmt_num >= stmt.size()) + throw std::invalid_argument("invalid statement number " + to_string(stmt_num)); + if (privatized_levels.size() != 0) { + if (privatized_levels[privatized_levels.size()-1] > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(privatized_levels[privatized_levels.size()-1]) + " for statement " + to_string(stmt_num)); + } + else { + if (level > stmt[stmt_num].loop_level.size()) + throw std::invalid_argument("invalid loop level " + to_string(level) + " for statement " + to_string(stmt_num)); + } + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + if (sym == NULL) { + sym = stmt_refs[i].second[j]->symbol(); + lex = getLexicalOrder(stmt_num); + } + else { + IR_ArraySymbol *t = stmt_refs[i].second[j]->symbol(); + if (t->name() != sym->name()) { + delete t; + delete sym; + throw std::invalid_argument("try to copy data from different arrays"); + } + delete t; + } + } + } + if (!(fastest_changing_dimension >= -1 && fastest_changing_dimension < sym->n_dim())) + throw std::invalid_argument("invalid fastest changing dimension for the array to be copied"); + if (padding_stride < 0) + throw std::invalid_argument("invalid temporary array stride requirement"); + if (padding_alignment == -1 || padding_alignment == 0) + throw std::invalid_argument("invalid temporary array alignment requirement"); + + int dim = 2*level - 1; + int n_dim = sym->n_dim(); + + if (fastest_changing_dimension == -1) + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_ROW_MAJOR: + fastest_changing_dimension = n_dim - 1; + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + fastest_changing_dimension = 0; + break; + default: + throw loop_error("unsupported array layout"); + } + + + // build iteration spaces for all reads and for all writes separately + apply_xform(active); + bool has_write_refs = false; + bool has_read_refs = false; + Relation wo_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); + Relation ro_copy_is = Relation::False(level-1+privatized_levels.size()+n_dim); + for (int i = 0; i < stmt_refs.size(); i++) { + int stmt_num = stmt_refs[i].first; + + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + Relation mapping(stmt[stmt_num].IS.n_set(), level-1+privatized_levels.size()+n_dim); + for (int k = 1; k <= mapping.n_inp(); k++) + mapping.name_input_var(k, stmt[stmt_num].IS.set_var(k)->name()); + mapping.setup_names(); + F_And *f_root = mapping.add_and(); + for (int k = 1; k <= level-1; k++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(k), 1); + h.update_coef(mapping.output_var(k), -1); + } + for (int k = 0; k < privatized_levels.size(); k++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(privatized_levels[k]), 1); + h.update_coef(mapping.output_var(level+k), -1); + } + for (int k = 0; k < n_dim; k++) { + CG_outputRepr *repr = stmt_refs[i].second[j]->index(k); + exp2formula(ir, mapping, f_root, freevar, repr, mapping.output_var(level-1+privatized_levels.size()+k+1), 'w', IR_COND_EQ, false); + repr->clear(); + delete repr; + } + Relation r = Range(Restrict_Domain(mapping, Intersection(copy(stmt[stmt_num].IS), Extend_Set(copy(this->known), stmt[stmt_num].IS.n_set() - this->known.n_set())))); + if (stmt_refs[i].second[j]->is_write()) { + has_write_refs = true; + wo_copy_is = Union(wo_copy_is, r); + wo_copy_is.simplify(2, 4); + } + else { + has_read_refs = true; + //protonu--removing the next line for now + ro_copy_is = Union(ro_copy_is, r); + ro_copy_is.simplify(2, 4); + //ro_copy_is = ConvexRepresentation(Union(ro_copy_is, r)); + + } + } + } + + if (allow_extra_read) { + Relation t = DecoupledConvexHull(copy(ro_copy_is)); + if (t.number_of_conjuncts() > 1) + ro_copy_is = RectHull(ro_copy_is); + else + ro_copy_is = t; + } + else { + Relation t = ConvexRepresentation(copy(ro_copy_is)); + if (t.number_of_conjuncts() > 1) + ro_copy_is = RectHull(ro_copy_is); + else + ro_copy_is = t; + } + wo_copy_is = ConvexRepresentation(wo_copy_is); + + if (allow_extra_read) { + Tuple<Relation> Rs; + Tuple<int> active; + for (DNF_Iterator di(ro_copy_is.query_DNF()); di; di++) { + Rs.append(Relation(ro_copy_is, di.curr())); + active.append(1); + } + Relation the_gcs = Relation::True(ro_copy_is.n_set()); + for (int i = level-1+privatized_levels.size()+1; i <= level-1+privatized_levels.size()+n_dim; i++) { + Relation r = greatest_common_step(Rs, active, i, Relation::Null()); + the_gcs = Intersection(the_gcs, r); + } + + ro_copy_is = Approximate(ro_copy_is); + ro_copy_is = ConvexRepresentation(ro_copy_is); + ro_copy_is = Intersection(ro_copy_is, the_gcs); + ro_copy_is.simplify(); + } + for (int i = 1; i <= level-1+privatized_levels.size()+n_dim; i++) { + wo_copy_is.name_set_var(i, tmp_loop_var_name_prefix+to_string(i)); + ro_copy_is.name_set_var(i, tmp_loop_var_name_prefix+to_string(i)); + } + wo_copy_is.setup_names(); + ro_copy_is.setup_names(); + + // build merged iteration space for calculating temporary array size + bool already_use_recthull = false; + Relation untampered_copy_is = ConvexRepresentation(Union(copy(wo_copy_is), copy(ro_copy_is))); + Relation copy_is = untampered_copy_is; + if (copy_is.number_of_conjuncts() > 1) { + try { + copy_is = ConvexHull(copy(untampered_copy_is)); + } + catch (const std::overflow_error &e) { + copy_is = RectHull(copy(untampered_copy_is)); + already_use_recthull = true; + } + } + + +Retry_copy_is: + // extract temporary array information + CG_outputBuilder *ocg = ir->builder(); + std::vector<CG_outputRepr *> index_lb(n_dim); // initialized to NULL + std::vector<coef_t> index_stride(n_dim, 1); + std::vector<bool> is_index_eq(n_dim, false); + std::vector<std::pair<int, CG_outputRepr *> > index_sz(0); + Relation reduced_copy_is = copy(copy_is); + + for (int i = 0; i < n_dim; i++) { + if (i != 0) + reduced_copy_is = Project(reduced_copy_is, level-1+privatized_levels.size()+i, Set_Var); + Relation bound = get_loop_bound(reduced_copy_is, level-1+privatized_levels.size()+i); + + // extract stride + EQ_Handle stride_eq; + { + bool simple_stride = true; + int strides = countStrides(bound.query_DNF()->single_conjunct(), bound.set_var(level-1+privatized_levels.size()+i+1), stride_eq, simple_stride); + if (strides > 1) { + throw loop_error("too many strides"); + } + else if (strides == 1) { + int sign = stride_eq.get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + Constr_Vars_Iter it(stride_eq, true); + index_stride[i] = abs((*it).coef/sign); + } + } + + // check if this arary index requires loop + Conjunct *c = bound.query_DNF()->single_conjunct(); + for (EQ_Iterator ei(c->EQs()); ei; ei++) { + if ((*ei).has_wildcards()) + continue; + + int coef = (*ei).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + if (coef != 0) { + int sign = 1; + if (coef < 0) { + coef = -coef; + sign = -1; + } + + CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*ei); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + if ((*ci).var != bound.set_var(level-1+privatized_levels.size()+i+1)) + if ((*ci).coef*sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef*sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent((*ci).var->name())); + else if ((*ci).coef*sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent((*ci).var->name()))); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef*sign == 1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef*sign == -1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef*sign > 1) + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); + else // (*ci).coef*sign < -1 + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt(abs((*ci).coef)), ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("unsupported array index expression"); + } + } + if ((*ei).get_const() != 0) + op = ocg->CreatePlus(op, ocg->CreateInt(-sign*((*ei).get_const()))); + if (coef != 1) + op = ocg->CreateIntegerDivide(op, ocg->CreateInt(coef)); + + index_lb[i] = op; + is_index_eq[i] = true; + break; + } + } + if (is_index_eq[i]) + continue; + + // seperate lower and upper bounds + std::vector<GEQ_Handle> lb_list, ub_list; + for (GEQ_Iterator gi(c->GEQs()); gi; gi++) { + int coef = (*gi).get_coef(bound.set_var(level-1+privatized_levels.size()+i+1)); + if (coef != 0 && (*gi).has_wildcards()) { + bool clean_bound = true; + GEQ_Handle h; + for (Constr_Vars_Iter cvi(*gi, true); gi; gi++) + if (!findFloorInequality(bound, (*cvi).var, h, bound.set_var(level-1+privatized_levels.size()+i+1))) { + clean_bound = false; + break; + } + if (!clean_bound) + continue; + } + + if (coef > 0) + lb_list.push_back(*gi); + else if (coef < 0) + ub_list.push_back(*gi); + } + if (lb_list.size() == 0 || ub_list.size() == 0) + if (already_use_recthull) + throw loop_error("failed to calcuate array footprint size"); + else { + copy_is = RectHull(copy(untampered_copy_is)); + already_use_recthull = true; + goto Retry_copy_is; + } + + // build lower bound representation + Tuple<CG_outputRepr *> lb_repr_list; + for (int j = 0; j < lb_list.size(); j++) + lb_repr_list.append(outputLBasRepr(ocg, lb_list[j], bound, + bound.set_var(level-1+privatized_levels.size()+i+1), + index_stride[i], stride_eq, Relation::True(bound.n_set()), + std::vector<CG_outputRepr *>(bound.n_set()))); + + if (lb_repr_list.size() > 1) + index_lb[i] = ocg->CreateInvoke("max", lb_repr_list); + else if (lb_repr_list.size() == 1) + index_lb[i] = lb_repr_list[1]; + + // build temporary array size representation + { + Relation cal(copy_is.n_set(), 1); + F_And *f_root = cal.add_and(); + for (int j = 0; j < ub_list.size(); j++) + for (int k = 0; k < lb_list.size(); k++) { + GEQ_Handle h = f_root->add_GEQ(); + + for (Constr_Vars_Iter ci(ub_list[j]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); + } + } + h.update_const(ub_list[j].get_const()); + + for (Constr_Vars_Iter ci(lb_list[k]); ci; ci++) { + switch ((*ci).var->kind()) { + case Input_Var: + { + int pos = (*ci).var->get_position(); + h.update_coef(cal.input_var(pos), (*ci).coef); + break; + } + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + Variable_ID v; + if (g->arity() == 0) + v = cal.get_local(g); + else + v = cal.get_local(g, (*ci).var->function_of()); + h.update_coef(v, (*ci).coef); + break; + } + default: + throw loop_error("cannot calculate temporay array size statically"); + } + } + h.update_const(lb_list[k].get_const()); + + h.update_const(1); + h.update_coef(cal.output_var(1), -1); + } + + cal = Restrict_Domain(cal, copy(copy_is)); + for (int j = 1; j <= cal.n_inp(); j++) + cal = Project(cal, j, Input_Var); + cal.simplify(); + + // pad temporary array size + // TODO: for variable array size, create padding formula + Conjunct *c = cal.query_DNF()->single_conjunct(); + bool is_index_bound_const = false; + for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) + if ((*gi).is_const(cal.output_var(1))) { + coef_t size = (*gi).get_const() / (-(*gi).get_coef(cal.output_var(1))); + if (padding_stride != 0) { + size = (size + index_stride[i] - 1) / index_stride[i]; + if (i == fastest_changing_dimension) + size = size * padding_stride; + } + if (i == fastest_changing_dimension) { + if (padding_alignment > 1) { // align to boundary for data packing + int residue = size % padding_alignment; + if (residue) + size = size+padding_alignment-residue; + } + else if (padding_alignment < -1) { // un-alignment for memory bank conflicts + while (gcd(size, static_cast<coef_t>(-padding_alignment)) != 1) + size++; + } + } + index_sz.push_back(std::make_pair(i, ocg->CreateInt(size))); + is_index_bound_const = true; + } + + if (!is_index_bound_const) { + for (GEQ_Iterator gi(c->GEQs()); gi && !is_index_bound_const; gi++) { + int coef = (*gi).get_coef(cal.output_var(1)); + if (coef < 0) { + CG_outputRepr *op = NULL; + for (Constr_Vars_Iter ci(*gi); ci; ci++) { + if ((*ci).var != cal.output_var(1)) { + switch((*ci).var->kind()) { + case Global_Var: + { + Global_Var_ID g = (*ci).var->get_global_var(); + if ((*ci).coef == 1) + op = ocg->CreatePlus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef == -1) + op = ocg->CreateMinus(op, ocg->CreateIdent(g->base_name())); + else if ((*ci).coef > 1) + op = ocg->CreatePlus(op, ocg->CreateTimes(ocg->CreateInt((*ci).coef), ocg->CreateIdent(g->base_name()))); + else // (*ci).coef < -1 + op = ocg->CreateMinus(op, ocg->CreateTimes(ocg->CreateInt(-(*ci).coef), ocg->CreateIdent(g->base_name()))); + break; + } + default: + throw loop_error("failed to generate array index bound code"); + } + } + } + int c = (*gi).get_const(); + if (c > 0) + op = ocg->CreatePlus(op, ocg->CreateInt(c)); + else if (c < 0) + op = ocg->CreateMinus(op, ocg->CreateInt(-c)); + if (padding_stride != 0) { + if (i == fastest_changing_dimension) { + coef_t g = gcd(index_stride[i], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[i] / g; + if (t1 != 1) + op = ocg->CreateIntegerDivide(ocg->CreatePlus(op, ocg->CreateInt(t1-1)), ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + op = ocg->CreateTimes(op, ocg->CreateInt(t2)); + } + else if (index_stride[i] != 1) { + op = ocg->CreateIntegerDivide(ocg->CreatePlus(op, ocg->CreateInt(index_stride[i]-1)), ocg->CreateInt(index_stride[i])); + } + } + + index_sz.push_back(std::make_pair(i, op)); + break; + } + } + } + } + } + + // change the temporary array index order + for (int i = 0; i < index_sz.size(); i++) + if (index_sz[i].first == fastest_changing_dimension) + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_ROW_MAJOR: + std::swap(index_sz[index_sz.size()-1], index_sz[i]); + break; + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: + std::swap(index_sz[0], index_sz[i]); + break; + default: + throw loop_error("unsupported array layout"); + } + + // declare temporary array or scalar + IR_Symbol *tmp_sym; + if (index_sz.size() == 0) { + tmp_sym = ir->CreateScalarSymbol(sym, memory_type); + } + else { + std::vector<CG_outputRepr *> tmp_array_size(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) + tmp_array_size[i] = index_sz[i].second->clone(); + tmp_sym = ir->CreateArraySymbol(sym, tmp_array_size, memory_type); + } + + // create temporary array read initialization code + CG_outputRepr *copy_code_read; + if (has_read_refs) + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_read = ir->builder()->CreateAssignment(0, tmp_scalar_ref->convert(), copied_array_ref->convert()); + } + else { + std::vector<CG_outputRepr *> lhs_index(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) { + int cur_index_num = index_sz[i].first; + CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (i == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + lhs_index[i] = cur_index_repr; + } + + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), lhs_index); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_read = ir->builder()->CreateAssignment(0, tmp_array_ref->convert(), copied_array_ref->convert()); + } + + // create temporary array write back code + CG_outputRepr *copy_code_write; + if (has_write_refs) + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + + std::vector<CG_outputRepr *> rhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + rhs_index[i] = index_lb[i]->clone(); + else + rhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, rhs_index); + + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_scalar_ref->convert()); + } + else { + std::vector<CG_outputRepr *> lhs_index(n_dim); + for (int i = 0; i < index_lb.size(); i++) + if (is_index_eq[i]) + lhs_index[i] = index_lb[i]->clone(); + else + lhs_index[i] = ir->builder()->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+i+1)->name()); + IR_ArrayRef *copied_array_ref = ir->CreateArrayRef(sym, lhs_index); + + std::vector<CG_outputRepr *> rhs_index(index_sz.size()); + for (int i = 0; i < index_sz.size(); i++) { + int cur_index_num = index_sz[i].first; + CG_outputRepr *cur_index_repr = ocg->CreateMinus(ocg->CreateIdent(copy_is.set_var(level-1+privatized_levels.size()+cur_index_num+1)->name()), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (i == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + rhs_index[i] = cur_index_repr; + } + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), rhs_index); + + copy_code_write = ir->builder()->CreateAssignment(0, copied_array_ref->convert(), tmp_array_ref->convert()); + } + + // now we can remove those loops for array indexes that are + // dependent on others + if (!(index_sz.size() == n_dim && (sym->layout_type() == IR_ARRAY_LAYOUT_ROW_MAJOR || n_dim <= 1))) { + Relation mapping(level-1+privatized_levels.size()+n_dim, level-1+privatized_levels.size()+index_sz.size()); + F_And *f_root = mapping.add_and(); + for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(i), 1); + h.update_coef(mapping.output_var(i), -1); + } + + int cur_index = 0; + std::vector<int> mapped_index(index_sz.size()); + for (int i = 0; i < n_dim; i++) + if (!is_index_eq[i]) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(mapping.input_var(level-1+privatized_levels.size()+i+1), 1); + switch (sym->layout_type()) { + case IR_ARRAY_LAYOUT_COLUMN_MAJOR: { + h.update_coef(mapping.output_var(level-1+privatized_levels.size()+index_sz.size()-cur_index), -1); + mapped_index[index_sz.size()-cur_index-1] = i; + break; + } + case IR_ARRAY_LAYOUT_ROW_MAJOR: { + h.update_coef(mapping.output_var(level-1+privatized_levels.size()+cur_index+1), -1); + mapped_index[cur_index] = i; + break; + } + default: + throw loop_error("unsupported array layout"); + } + cur_index++; + } + + wo_copy_is = Range(Restrict_Domain(copy(mapping), wo_copy_is)); + ro_copy_is = Range(Restrict_Domain(copy(mapping), ro_copy_is)); + for (int i = 1; i <= level-1+privatized_levels.size(); i++) { + wo_copy_is.name_set_var(i, copy_is.set_var(i)->name()); + ro_copy_is.name_set_var(i, copy_is.set_var(i)->name()); + } + for (int i = 0; i < index_sz.size(); i++) { + wo_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + ro_copy_is.name_set_var(level-1+privatized_levels.size()+i+1, copy_is.set_var(level-1+privatized_levels.size()+mapped_index[i]+1)->name()); + } + wo_copy_is.setup_names(); + ro_copy_is.setup_names(); + } + + // insert read copy statement + int old_num_stmt = stmt.size(); + int ro_copy_stmt_num = -1; + if (has_read_refs) { + Relation copy_xform(ro_copy_is.n_set(), 2*ro_copy_is.n_set()+1); + { + F_And *f_root = copy_xform.add_and(); + for (int i = 1; i <= ro_copy_is.n_set(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.input_var(i), 1); + h.update_coef(copy_xform.output_var(2*i), -1); + } + for (int i = 1; i <= dim; i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), -1); + h.update_const(lex[i-1]); + } + for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), 1); + } + } + + Statement copy_stmt_read; + copy_stmt_read.IS = ro_copy_is; + copy_stmt_read.xform = copy_xform; + copy_stmt_read.code = copy_code_read; + copy_stmt_read.loop_level = std::vector<LoopLevel>(ro_copy_is.n_set()); + for (int i = 0; i < level-1; i++) { + copy_stmt_read.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; + if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && + stmt[*(active.begin())].loop_level[i].payload >= level) { + int j; + for (j = 0; j < privatized_levels.size(); j++) + if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) + break; + if (j == privatized_levels.size()) + copy_stmt_read.loop_level[i].payload = -1; + else + copy_stmt_read.loop_level[i].payload = level + j; + } + else + copy_stmt_read.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; + copy_stmt_read.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; + } + for (int i = 0; i < privatized_levels.size(); i++) { + copy_stmt_read.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_read.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_read.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + } + int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); + for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].payload = -1; + copy_stmt_read.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + + shiftLexicalOrder(lex, dim-1, 1); + stmt.push_back(copy_stmt_read); + ro_copy_stmt_num = stmt.size() - 1; + dep.insert(); + } + + // insert write copy statement + int wo_copy_stmt_num = -1; + if (has_write_refs) { + Relation copy_xform(wo_copy_is.n_set(), 2*wo_copy_is.n_set()+1); + { + F_And *f_root = copy_xform.add_and(); + for (int i = 1; i <= wo_copy_is.n_set(); i++) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.input_var(i), 1); + h.update_coef(copy_xform.output_var(2*i), -1); + } + for (int i = 1; i <= dim; i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), -1); + h.update_const(lex[i-1]); + } + for (int i = dim+2; i <= copy_xform.n_out(); i+=2) { + EQ_Handle h = f_root->add_EQ(); + h.update_coef(copy_xform.output_var(i), 1); + } + } + + Statement copy_stmt_write; + copy_stmt_write.IS = wo_copy_is; + copy_stmt_write.xform = copy_xform; + copy_stmt_write.code = copy_code_write; + copy_stmt_write.loop_level = std::vector<LoopLevel>(wo_copy_is.n_set()); + for (int i = 0; i < level-1; i++) { + copy_stmt_write.loop_level[i].type = stmt[*(active.begin())].loop_level[i].type; + if (stmt[*(active.begin())].loop_level[i].type == LoopLevelTile && + stmt[*(active.begin())].loop_level[i].payload >= level) { + int j; + for (j = 0; j < privatized_levels.size(); j++) + if (privatized_levels[j] == stmt[*(active.begin())].loop_level[i].payload) + break; + if (j == privatized_levels.size()) + copy_stmt_write.loop_level[i].payload = -1; + else + copy_stmt_write.loop_level[i].payload = level + j; + } + else + copy_stmt_write.loop_level[i].payload = stmt[*(active.begin())].loop_level[i].payload; + copy_stmt_write.loop_level[i].parallel_level = stmt[*(active.begin())].loop_level[i].parallel_level; + } + for (int i = 0; i < privatized_levels.size(); i++) { + copy_stmt_write.loop_level[level-1+i].type = stmt[*(active.begin())].loop_level[privatized_levels[i]].type; + copy_stmt_write.loop_level[level-1+i].payload = stmt[*(active.begin())].loop_level[privatized_levels[i]].payload; + copy_stmt_write.loop_level[level-1+i].parallel_level = stmt[*(active.begin())].loop_level[privatized_levels[i]].parallel_level; + } + int left_num_dim = num_dep_dim - (get_last_dep_dim_before(*(active.begin()), level) + 1); + for (int i = 0; i < min(left_num_dim, static_cast<int>(index_sz.size())); i++) { + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelOriginal; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = num_dep_dim-left_num_dim+i; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + for (int i = min(left_num_dim, static_cast<int>(index_sz.size())); i < index_sz.size(); i++) { + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].type = LoopLevelUnknown; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].payload = -1; + copy_stmt_write.loop_level[level-1+privatized_levels.size()+i].parallel_level = 0; + } + + lex[dim-1]++; + shiftLexicalOrder(lex, dim-1, -2); + stmt.push_back(copy_stmt_write); + wo_copy_stmt_num = stmt.size() - 1; + dep.insert(); + } + + // replace original array accesses with temporary array accesses + for (int i =0; i < stmt_refs.size(); i++) + for (int j = 0; j < stmt_refs[i].second.size(); j++) { + if (index_sz.size() == 0) { + IR_ScalarRef *tmp_scalar_ref = ir->CreateScalarRef(static_cast<IR_ScalarSymbol *>(tmp_sym)); + ir->ReplaceExpression(stmt_refs[i].second[j], tmp_scalar_ref->convert()); + } + else { + std::vector<CG_outputRepr *> index_repr(index_sz.size()); + for (int k = 0; k < index_sz.size(); k++) { + int cur_index_num = index_sz[k].first; + + CG_outputRepr *cur_index_repr = ocg->CreateMinus(stmt_refs[i].second[j]->index(cur_index_num), index_lb[cur_index_num]->clone()); + if (padding_stride != 0) { + if (k == n_dim-1) { + coef_t g = gcd(index_stride[cur_index_num], static_cast<coef_t>(padding_stride)); + coef_t t1 = index_stride[cur_index_num] / g; + if (t1 != 1) + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(t1)); + coef_t t2 = padding_stride / g; + if (t2 != 1) + cur_index_repr = ocg->CreateTimes(cur_index_repr, ocg->CreateInt(t2)); + } + else if (index_stride[cur_index_num] != 1) { + cur_index_repr = ocg->CreateIntegerDivide(cur_index_repr, ocg->CreateInt(index_stride[cur_index_num])); + } + } + + if (ir->ArrayIndexStartAt() != 0) + cur_index_repr = ocg->CreatePlus(cur_index_repr, ocg->CreateInt(ir->ArrayIndexStartAt())); + index_repr[k] = cur_index_repr; + } + + IR_ArrayRef *tmp_array_ref = ir->CreateArrayRef(static_cast<IR_ArraySymbol *>(tmp_sym), index_repr); + ir->ReplaceExpression(stmt_refs[i].second[j], tmp_array_ref->convert()); + } + } + + // update dependence graph + int dep_dim = get_last_dep_dim_before(*(active.begin()), level) + 1; + if (ro_copy_stmt_num != -1) { + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::vector<DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { + if (active.find(i) != active.end() && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_R2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + dep.connect(ro_copy_stmt_num, j->first, dvs1); + } + else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2R || dv.type == DEP_W2R)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + D.push_back(dvs1); + } + + if (j->second.size() == 0) + dep.vertex[i].second.erase(j++); + else + j++; + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, ro_copy_stmt_num, D[j]); + } + + // insert dependences from copy statement loop to copied statements + DependenceVector dv; + dv.type = DEP_W2R; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); + dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); + for (int i = dep_dim; i < num_dep_dim; i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + dep.connect(ro_copy_stmt_num, *i, dv); + } + + if (wo_copy_stmt_num != -1) { + for (int i = 0; i < old_num_stmt; i++) { + std::vector<std::vector<DependenceVector> > D; + + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end();) { + if (active.find(i) != active.end() && active.find(j->first) == active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_W2R || dv.type == DEP_W2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + dep.connect(wo_copy_stmt_num, j->first, dvs1); + } + else if (active.find(i) == active.end() && active.find(j->first) != active.end()) { + std::vector<DependenceVector> dvs1, dvs2; + for (int k = 0; k < j->second.size(); k++) { + DependenceVector dv = j->second[k]; + if (dv.sym != NULL && dv.sym->name() == sym->name() && (dv.type == DEP_R2W || dv.type == DEP_W2W)) + dvs1.push_back(dv); + else + dvs2.push_back(dv); + } + j->second = dvs2; + if (dvs1.size() > 0) + D.push_back(dvs1); + } + + if (j->second.size() == 0) + dep.vertex[i].second.erase(j++); + else + j++; + } + + for (int j = 0; j < D.size(); j++) + dep.connect(i, wo_copy_stmt_num, D[j]); + } + + // insert dependences from copied statements to write statements + DependenceVector dv; + dv.type = DEP_W2R; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); + dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); + for (int i = dep_dim; i < num_dep_dim; i++) { + dv.lbounds[i] = -posInfinity; + dv.ubounds[i] = posInfinity; + } + for (std::set<int>::iterator i = active.begin(); i != active.end(); i++) + dep.connect(*i, wo_copy_stmt_num, dv); + + } + + // update variable name for dependences among copied statements + for (int i = 0; i < old_num_stmt; i++) { + if (active.find(i) != active.end()) + for (DependenceGraph::EdgeList::iterator j = dep.vertex[i].second.begin(); j != dep.vertex[i].second.end(); j++) + if (active.find(j->first) != active.end()) + for (int k = 0; k < j->second.size(); k++) { + IR_Symbol *s = tmp_sym->clone(); + j->second[k].sym = s; + } + } + + // insert anti-dependence from write statement to read statement + if (ro_copy_stmt_num != -1 && wo_copy_stmt_num != -1) + if (dep_dim >= 0) { + DependenceVector dv; + dv.type = DEP_R2W; + dv.sym = tmp_sym->clone(); + dv.lbounds = std::vector<coef_t>(num_dep_dim, 0); + dv.ubounds = std::vector<coef_t>(num_dep_dim, 0); + for (int k = dep_dim; k < num_dep_dim; k++) { + dv.lbounds[k] = -posInfinity; + dv.ubounds[k] = posInfinity; + } + for (int k = 0; k < dep_dim; k++) { + if (k != 0) { + dv.lbounds[k-1] = 0; + dv.ubounds[k-1] = 0; + } + dv.lbounds[k] = 1; + dv.ubounds[k] = posInfinity; + dep.connect(wo_copy_stmt_num, ro_copy_stmt_num, dv); + } + } + + + // cleanup + delete sym; + delete tmp_sym; + for (int i = 0; i < index_lb.size(); i++) { + index_lb[i]->clear(); + delete index_lb[i]; + } + for (int i = 0; i < index_sz.size(); i++) { + index_sz[i].second->clear(); + delete index_sz[i].second; + } + + return true; +} diff --git a/parse_expr.ll b/parse_expr.ll new file mode 100644 index 0000000..e97a1db --- /dev/null +++ b/parse_expr.ll @@ -0,0 +1,25 @@ +%{ +// some C++ code +#include "chill_run_util.hh" +#include "parse_expr.tab.hh" +%} + +%option noyywrap +%option header-file="parse_expr.ll.hh" + +%% +[ \t]+ /*ignore*/ +\n /*ignore*/ +L[0-9]+ { yylval.val = atoi(&yytext[1]); return LEVEL; } +[0-9]+ { yylval.val = atoi(yytext); return NUMBER; } +\<\= return LE; +\>\= return GE; +\=(\=)? return EQ; +[a-zA-Z_][a-zA-Z_0-9]* { + yylval.str_val = new char[yyleng+1]; + strcpy(yylval.str_val, yytext); + return VARIABLE; + } +. return (int)yytext[0]; +%% + diff --git a/parse_expr.yy b/parse_expr.yy new file mode 100644 index 0000000..c2943c2 --- /dev/null +++ b/parse_expr.yy @@ -0,0 +1,85 @@ +%{ +#include "chill_run_util.hh" +#include "parse_expr.ll.hh" + +extern int yydebug; + +void yyerror(const char*); +int yyparse(simap_vec_t** rel); + +static simap_vec_t* return_rel; // used as the return value for yyparse + +%} + +%union { + int val; + char* str_val; + simap_t* cond_item; + simap_vec_t* cond; +} + +%token <val> NUMBER +%token <val> LEVEL +%token <str_val> VARIABLE + +%left LE GE EQ '<' '>' +%left '-' '+' '*' '/' + +/*the final output from this language should be an Omega Relation object*/ +%type <cond> cond prog +%type <cond_item> expr add_expr mul_expr neg_expr + +%% +prog : cond { return_rel = make_prog($1); } +; + +cond : expr '>' expr { $$ = make_cond_gt($1, $3); } + | expr '<' expr { $$ = make_cond_lt($1, $3); } + | expr GE expr { $$ = make_cond_ge($1, $3); } + | expr LE expr { $$ = make_cond_le($1, $3); } + | expr EQ expr { $$ = make_cond_eq($1, $3); } +; + +expr : add_expr { $$ = $1; } +; + +add_expr : add_expr '+' mul_expr { $$ = make_cond_item_add($1,$3); } + | add_expr '-' mul_expr { $$ = make_cond_item_sub($1,$3); } + | mul_expr { $$ = $1; } +; + +mul_expr : mul_expr '*' neg_expr { $$ = make_cond_item_mul($1,$3); } + | neg_expr { $$ = $1; } +; + +neg_expr : '-' neg_expr { $$ = make_cond_item_neg($2); } + | '(' expr ')' { $$ = $2; } + | NUMBER { $$ = make_cond_item_number($1); } + | LEVEL { $$ = make_cond_item_level($1); } + | VARIABLE { $$ = make_cond_item_variable($1); } +; +%% + +void yyerror(const char* msg) { + fprintf(stderr, "Parse error: %s", msg); +} + +simap_vec_t* parse_relation_vector(const char* expr) { + yydebug=0; + YY_BUFFER_STATE state; + + //if(yylex_init()) { + // TODO: error out or something + //} + + state = yy_scan_string(expr); + + if(yyparse()) { + // TODO: error out or something + } + + yy_delete_buffer(state); + yylex_destroy(); + return return_rel; +} + diff --git a/parser.ll b/parser.ll new file mode 100644 index 0000000..0545e1b --- /dev/null +++ b/parser.ll @@ -0,0 +1,97 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California. + All Rights Reserved. + + Purpose: + CHiLL script lexical analysis + + Update history: + created by Chun Chen, Jan 2008 +*****************************************************************************/ + +%{ +#include <stdio.h> +#include <string.h> +#include <vector> +#include <map> +#include "parser.tab.hh" + +extern std::map<std::string, int> parameter_tab; +extern bool is_interactive; +extern const char *PROMPT_STRING; +%} + +%s LINE COMMENT FILE_NAME PROCEDURE_NAME +%option yylineno +%option noyywrap + +%% +# BEGIN(COMMENT); +<COMMENT>.* /* comment */ +source BEGIN(FILE_NAME); return SOURCE; +<FILE_NAME>[^ \t\n:#]+ yylval.name = new char[yyleng+1]; strcpy(yylval.name, yytext); return FILENAME; +procedure BEGIN(LINE); return PROCEDURE; +loop BEGIN(LINE); return LOOP; +format BEGIN(FILE_NAME); return FORMAT; +original BEGIN(LINE); return ORIGINAL; +permute BEGIN(LINE); return PERMUTE; +pragma BEGIN(LINE); return PRAGMA; +prefetch BEGIN(LINE); return PREFETCH; +tile BEGIN(LINE); return TILE; +datacopy BEGIN(LINE); return DATACOPY; +datacopy_privatized BEGIN(LINE); return DATACOPY_PRIVATIZED; +unroll BEGIN(LINE); return UNROLL; +unroll_extra BEGIN(LINE); return UNROLL_EXTRA; +split BEGIN(LINE); return SPLIT; +nonsingular BEGIN(LINE); return NONSINGULAR; +print BEGIN(LINE); return PRINT; +dep BEGIN(LINE); return PRINT_DEP; +code BEGIN(LINE); return PRINT_CODE; +space BEGIN(LINE); return PRINT_IS; +exit BEGIN(LINE); return EXIT; +known BEGIN(LINE); return KNOWN; +strided BEGIN(LINE); return STRIDED; +counted BEGIN(LINE); return COUNTED; +num_statement BEGIN(LINE); return NUM_STATEMENT; +ceil BEGIN(LINE); return CEIL; +floor BEGIN(LINE); return FLOOR; +true BEGIN(LINE); yylval.bool_val = true; return TRUEORFALSE; +false BEGIN(LINE); yylval.bool_val = false; return TRUEORFALSE; +skew BEGIN(LINE); return SKEW; +shift BEGIN(LINE); return SHIFT; +scale BEGIN(LINE); return SCALE; +reverse BEGIN(LINE); return REVERSE; +shift_to BEGIN(LINE); return SHIFT_TO; +fuse BEGIN(LINE); return FUSE; +peel BEGIN(LINE); return PEEL; +distribute BEGIN(LINE); return DISTRIBUTE; +remove_dep BEGIN(LINE); return REMOVE_DEP; +structure BEGIN(LINE); return PRINT_STRUCTURE; +[ \t]+ /* ignore whitespaces */ +\n BEGIN(INITIAL); return (int)yytext[0]; +L[0-9]+ yylval.val = atoi(&yytext[1]); return LEVEL; +[a-zA-Z_][a-zA-Z_0-9]* { + BEGIN(LINE); + yylval.name = new char[yyleng+1]; + strcpy(yylval.name, yytext); + return VARIABLE; + } +\"(\\.|[^\\"])*\" { + BEGIN(LINE); + std::string str = std::string(yytext); + yylval.name = new char[yyleng-1]; + str = str.substr(1,yyleng-2); + strcpy(yylval.name, str.c_str()); + return STRING; + } +[0-9]+ yylval.val = atoi(yytext); return NUMBER; +\>\= return GE; +\<\= return LE; +\!\= return NE; +\=\= return EQ; +. return (int)yytext[0]; +<LINE><<EOF>> BEGIN(INITIAL); unput('\n'); + +%% + + diff --git a/parser.yy b/parser.yy new file mode 100644 index 0000000..1559ab4 --- /dev/null +++ b/parser.yy @@ -0,0 +1,1605 @@ +/***************************************************************************** + Copyright (C) 2008 University of Southern California. + Copyright (C) 2009-2010 University of Utah. + All Rights Reserved. + + Purpose: + CHiLL script yacc parser + + Notes: + + History: + 01/2008 created by Chun Chen +*****************************************************************************/ + +%{ +#include <stdio.h> +#include <math.h> +#include <iostream> +#include <fstream> +#include <vector> +#include <map> +#include <set> +#include <string> +#include <FlexLexer.h> +#include "parser.tab.hh" + +#include <omega.h> +#include "ir_code.hh" +#include "loop.hh" + +#ifdef BUILD_ROSE +#include "ir_rose.hh" +#elif BUILD_SUIF +#include "ir_suif.hh" +#endif + + +using namespace omega; + +extern int yydebug; + +void yyerror(const char *); +int yylex(); +yyFlexLexer lexer; + +namespace { + enum COMPILER_IR_TYPE {CIT_NULL, CIT_SUIF, CIT_ROSE}; + char *source_filename = NULL; + COMPILER_IR_TYPE cit_name = CIT_NULL; + #ifdef BUILD_ROSE + char* procedure_name = NULL; + #elif BUILD_SUIF + int procedure_number = -1; + #endif + + int loop_num_start, loop_num_end; + Loop *myloop = NULL; +} + +#define PRINT_ERR_LINENO {if (is_interactive) fprintf(stderr, "\n"); else fprintf(stderr, " at line %d\n", lexer.lineno()-1);} + +std::map<std::string, int> parameter_tab; +bool is_interactive; +const char *PROMPT_STRING = ">>>"; + +IR_Code *ir_code = NULL; +std::vector<IR_Control *> ir_controls; +std::vector<int> loops; +%} + +%union { + int val; + float fval; + bool bool_val; + char *name; + std::vector<int> *vec; + std::vector<std::vector<int> > *mat; + std::map<std::string, int> *tab; + std::vector<std::map<std::string, int> > *tab_lst; + std::pair<std::vector<std::map<std::string, int> >, std::map<std::string, int> > *eq_term_pair; +} + +%token <val> NUMBER LEVEL +%token <bool_val> TRUEORFALSE +%token <name> FILENAME PROCEDURENAME VARIABLE FREEVAR STRING +%token SOURCE PROCEDURE FORMAT LOOP PERMUTE ORIGINAL TILE UNROLL SPLIT UNROLL_EXTRA PRAGMA PREFETCH +%token DATACOPY DATACOPY_PRIVATIZED +%token NONSINGULAR EXIT KNOWN SKEW SHIFT SHIFT_TO FUSE DISTRIBUTE REMOVE_DEP SCALE REVERSE PEEL +%token STRIDED COUNTED NUM_STATEMENT CEIL FLOOR +%token PRINT PRINT_CODE PRINT_DEP PRINT_IS PRINT_STRUCTURE +%token NE LE GE EQ + +%type <vec> vector vector_number +/* TODO: %type <eq_term_pair> cond_term cond */ +%type <tab> cond_term +%type <tab_lst> cond +%type <mat> matrix matrix_part +%type <val> expr +%type <fval> float_expr + +%destructor {delete []$$; } FILENAME VARIABLE FREEVAR +%destructor {delete $$; } vector vector_number cond_term cond matrix matrix_part + +%left '>' '<' NE LE GE +%left '+' '-' +%left '*' '/' +%left '%' +%left UMINUS + + +%% +script : /* empty */ + | script command +; + + +vector : '[' vector_number ']' {$$ = $2;} +; + +vector_number : {$$ = new std::vector<int>();} + | expr {$$ = new std::vector<int>(); $$->push_back($1);} + | vector_number ',' expr {$$ = $1; $$->push_back($3);} +; + +matrix: '[' matrix_part ']' {$$ = $2;} + +matrix_part : vector {$$ = new std::vector<std::vector<int> >(); $$->push_back(*$1); delete $1;} + | matrix_part ',' vector {$$ = $1; $$->push_back(*$3); delete $3;} + +expr : NUMBER {$$ = $1;} + | VARIABLE { + std::map<std::string, int>::iterator it = parameter_tab.find(std::string($1)); + if (it != parameter_tab.end()) { + $$ = it->second; + delete []$1; + } + else { + if (is_interactive) + fprintf(stderr, "variable \"%s\" undefined\n", $1); + else + fprintf(stderr, "variable \"%s\" undefined at line %d\n", $1, lexer.lineno()); + delete []$1; + if (!is_interactive) + exit(2); + } + } + | NUM_STATEMENT '(' ')' { + if (myloop == NULL) + $$ = 0; + else + $$ = myloop->num_statement(); + } + | CEIL '(' float_expr ')' { + $$ = ceil($3); + } + | FLOOR '(' float_expr ')' { + $$ = floor($3); + } + | '(' expr ')' {$$ = $2;} + | expr '-' expr {$$ = $1-$3;} + | expr '+' expr {$$ = $1+$3;} + | expr '*' expr {$$ = $1*$3;} + | expr '/' expr {$$ = $1/$3;} + | '-' expr %prec UMINUS {$$ = -$2;} +; + +float_expr : NUMBER {$$ = $1;} + | VARIABLE { + std::map<std::string, int>::iterator it = parameter_tab.find(std::string($1)); + if (it != parameter_tab.end()) { + $$ = it->second; + delete []$1; + } + else { + if (is_interactive) + fprintf(stderr, "variable \"%s\" undefined\n", $1); + else + fprintf(stderr, "variable \"%s\" undefined at line %d\n", $1, lexer.lineno()); + delete []$1; + if (!is_interactive) + exit(2); + } + } + | NUM_STATEMENT '(' ')' { + if (myloop == NULL) + $$ = 0; + else + $$ = myloop->num_statement(); + } + | CEIL '(' float_expr ')' { + $$ = ceil($3); + } + | FLOOR '(' float_expr ')' { + $$ = floor($3); + } + | '(' float_expr ')' {$$ = $2;} + | float_expr '-' float_expr {$$ = $1-$3;} + | float_expr '+' float_expr {$$ = $1+$3;} + | float_expr '*' float_expr {$$ = $1*$3;} + | float_expr '/' float_expr {$$ = $1/$3;} + | '-' float_expr %prec UMINUS {$$ = -$2;} +; + + +cond : cond_term GE cond_term { + for (std::map<std::string, int>::iterator it = $3->begin(); it != $3->end(); it++) + (*$1)[it->first] -= it->second; + $$ = new std::vector<std::map<std::string, int> >(); + $$->push_back(*$1); + delete $1; + delete $3; + } + | cond_term '>' cond_term { + for (std::map<std::string, int>::iterator it = $3->begin(); it != $3->end(); it++) + (*$1)[it->first] -= it->second; + $$ = new std::vector<std::map<std::string, int> >(); + (*$1)[to_string(0)] -= 1; + $$->push_back(*$1); + delete $1; + delete $3; + } + | cond_term LE cond_term { + for (std::map<std::string, int>::iterator it = $1->begin(); it != $1->end(); it++) + (*$3)[it->first] -= it->second; + $$ = new std::vector<std::map<std::string, int> >(); + $$->push_back(*$3); + delete $1; + delete $3; + } + | cond_term '<' cond_term { + for (std::map<std::string, int>::iterator it = $1->begin(); it != $1->end(); it++) + (*$3)[it->first] -= it->second; + $$ = new std::vector<std::map<std::string, int> >(); + (*$3)[to_string(0)] -= 1; + $$->push_back(*$3); + delete $1; + delete $3; + } + | cond_term EQ cond_term { + for (std::map<std::string, int>::iterator it = $3->begin(); it != $3->end(); it++) + (*$1)[it->first] -= it->second; + $$ = new std::vector<std::map<std::string, int> >(); + $$->push_back(*$1); + for (std::map<std::string, int>::iterator it = $1->begin(); it != $1->end(); it++) + it->second = -it->second; + $$->push_back(*$1); + delete $1; + delete $3; + } +; + +cond_term : NUMBER {$$ = new std::map<std::string, int>(); (*$$)[to_string(0)] = $1;} + | LEVEL {$$ = new std::map<std::string, int>(); (*$$)[to_string($1)] = 1;} + | VARIABLE { + $$ = new std::map<std::string, int>(); + + std::map<std::string, int>::iterator it = parameter_tab.find(std::string($1)); + if (it != parameter_tab.end()) + (*$$)[to_string(0)] = it->second; + else + (*$$)[std::string($1)] = 1; + + delete []$1; + } + | '(' cond_term ')' {$$ = $2;} + | cond_term '-' cond_term { + for (std::map<std::string, int>::iterator it = $3->begin(); it != $3->end(); it++) + (*$1)[it->first] -= it->second; + $$ = $1; + delete $3; + } + | cond_term '+' cond_term { + for (std::map<std::string, int>::iterator it = $3->begin(); it != $3->end(); it++) + (*$1)[it->first] += it->second; + $$ = $1; + delete $3; + } + | cond_term '*' cond_term { + (*$1)[to_string(0)] += 0; + (*$3)[to_string(0)] += 0; + if ($1->size() == 1) { + int t = (*$1)[to_string(0)]; + for (std::map<std::string, int>::iterator it = $3->begin(); it != $3->end(); it++) + it->second *= t; + $$ = $3; + delete $1; + } + else if ($3->size() == 1) { + int t = (*$3)[to_string(0)]; + for (std::map<std::string, int>::iterator it = $1->begin(); it != $1->end(); it++) + it->second *= t; + $$ = $1; + delete $3; + } + else { + if (is_interactive) + fprintf(stderr, "require Presburger formula\n"); + else + fprintf(stderr, "require Presburger formula at line %d\n", lexer.lineno()); + delete $1; + delete $3; + exit(2); + } + } + | '-' cond_term %prec UMINUS { + for (std::map<std::string, int>::iterator it = $2->begin(); it != $2->end(); it++) + it->second = -(it->second); + $$ = $2; + } +; + +command : '\n' { if (is_interactive) printf("%s ", PROMPT_STRING); } + | error '\n' { if (!is_interactive) exit(2); else printf("%s ", PROMPT_STRING); } + | SOURCE ':' FILENAME '\n' { + if (source_filename != NULL) { + fprintf(stderr, "only one file can be handle in a script"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + source_filename = $3; + if (is_interactive) + printf("%s ", PROMPT_STRING); + } + | PROCEDURE ':' VARIABLE '\n' { + + #ifdef BUILD_ROSE + + if (procedure_name != NULL) { + fprintf(stderr, "only one procedure can be handled in a script"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + procedure_name = $3; + if (is_interactive) + printf("%s ", PROMPT_STRING); + #elif BUILD_SUIF + fprintf(stderr, "Please specify procedure number and not name!!"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + #else + fprintf(stderr, "Please configure IR type to ROSE or SUIF!!: Procedure number for SUIF and procedure name for ROSE"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + #endif + } + | PROCEDURE ':' NUMBER '\n' { + + #ifdef BUILD_ROSE + fprintf(stderr, "Please specify procedure's name and not number!!"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + + #elif BUILD_SUIF + if (procedure_number != -1) { + fprintf(stderr, "only one procedure can be handled in a script"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + procedure_number = $3; + if (is_interactive) + printf("%s ", PROMPT_STRING); + #else + fprintf(stderr, "Please configure IR type to ROSE or SUIF: Procedure number for suif and procedure name for rose!!"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + #endif + } + | FORMAT ':' FILENAME '\n' { + if (cit_name != CIT_NULL) { + fprintf(stderr, "compiler intermediate format already specified"); + PRINT_ERR_LINENO; + delete []$3; + if (!is_interactive) + exit(2); + } + else { + + if (std::string($3) == "suif" || std::string($3) == "SUIF") { + cit_name = CIT_SUIF; + delete []$3; + } + else if(std::string($3) == "rose" || std::string($3) == "ROSE") { + cit_name = CIT_ROSE; + delete []$3; + } + else { + fprintf(stderr, "unrecognized IR format"); + PRINT_ERR_LINENO; + delete []$3; + if (!is_interactive) + exit(2); + } + } + if (is_interactive) + printf("%s ", PROMPT_STRING); + } + | LOOP ':' NUMBER '\n' { + if (source_filename == NULL) { + fprintf(stderr, "source file not set when initializing the loop"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + else { + if (ir_code == NULL) { + #ifdef BUILD_ROSE + if (procedure_name == NULL) + procedure_name = "main"; + #elif BUILD_SUIF + if (procedure_number == -1) + procedure_number = 0; + #endif + + switch (cit_name) { + #ifndef BUILD_ROSE + case CIT_SUIF: + #ifdef BUILD_SUIF + ir_code = new IR_suifCode(source_filename, procedure_number); + #else + fprintf(stderr, "SUIF IR not installed"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + #endif + break; + #endif + case CIT_ROSE: + #ifdef BUILD_ROSE + ir_code = new IR_roseCode(source_filename, procedure_name); + #else + fprintf(stderr, "ROSE IR not installed"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + #endif + break; + case CIT_NULL: + fprintf(stderr, "compiler IR format not specified"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + break; + } + + IR_Block *block = ir_code->GetCode(); + ir_controls = ir_code->FindOneLevelControlStructure(block); + for (int i = 0; i < ir_controls.size(); i++) + if (ir_controls[i]->type() == IR_CONTROL_LOOP) + loops.push_back(i); + delete block; + } + if (myloop != NULL && myloop->isInitialized()) { + if (loop_num_start == loop_num_end) { + ir_code->ReplaceCode(ir_controls[loops[loop_num_start]], myloop->getCode()); + ir_controls[loops[loop_num_start]] = NULL; + } + else { + std::vector<IR_Control *> parm; + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) + parm.push_back(ir_controls[i]); + IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); + ir_code->ReplaceCode(block, myloop->getCode()); + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { + delete ir_controls[i]; + ir_controls[i] = NULL; + } + } + delete myloop; + } + loop_num_start = loop_num_end = $3; + if (loop_num_start >= loops.size()) { + fprintf(stderr, "loop %d does not exist", loop_num_start); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (ir_controls[loops[loop_num_start]] == NULL) { + fprintf(stderr, "loop %d has already be transformed", loop_num_start); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + myloop = new Loop(ir_controls[loops[loop_num_start]]); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | LOOP ':' NUMBER '-' NUMBER '\n' { + if (source_filename == NULL) { + fprintf(stderr, "source file not set when initializing the loop"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + else { + if (ir_code == NULL) { + #ifdef BUILD_ROSE + if (procedure_name == NULL) + procedure_name = "main"; + #elif BUILD_SUIF + if (procedure_number == -1) + procedure_number = 0; + #endif + + switch (cit_name) { + #ifndef BUILD_ROSE + case CIT_SUIF: + #ifdef BUILD_SUIF + ir_code = new IR_suifCode(source_filename, procedure_number); + #else + fprintf(stderr, "SUIF IR not installed"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + #endif + break; + #endif + case CIT_ROSE: + #ifdef BUILD_ROSE + ir_code = new IR_roseCode(source_filename, procedure_name); + #else + fprintf(stderr, "ROSE IR not installed"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + #endif + break; + case CIT_NULL: + fprintf(stderr, "compiler IR format not specified"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + break; + } + + + + IR_Block *block = ir_code->GetCode(); + ir_controls = ir_code->FindOneLevelControlStructure(block); + for (int i = 0; i < ir_controls.size(); i++) + if (ir_controls[i]->type() == IR_CONTROL_LOOP) + loops.push_back(i); + delete block; + } + if (myloop != NULL && myloop->isInitialized()) { + if (loop_num_start == loop_num_end) { + ir_code->ReplaceCode(ir_controls[loops[loop_num_start]], myloop->getCode()); + ir_controls[loops[loop_num_start]] = NULL; + } + else { + std::vector<IR_Control *> parm; + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) + parm.push_back(ir_controls[i]); + IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); + ir_code->ReplaceCode(block, myloop->getCode()); + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { + delete ir_controls[i]; + ir_controls[i] = NULL; + } + } + delete myloop; + } + loop_num_start = $3; + loop_num_end = $5; + if ($5 < $3) { + fprintf(stderr, "the last loop must be after the start loop"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (loop_num_end >= loops.size()) { + fprintf(stderr, "loop %d does not exist", loop_num_end); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + std::vector<IR_Control *> parm; + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { + if (ir_controls[i] == NULL) { + fprintf(stderr, "loop has already been processed"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + parm.push_back(ir_controls[i]); + } + IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); + myloop = new Loop(block); + delete block; + + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | PRINT '\n' { + if (myloop == NULL) { + fprintf(stderr, "loop not initialized"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + else { + myloop->printCode(); + } + if (is_interactive) printf("%s ", PROMPT_STRING); else printf("\n"); + } + | PRINT PRINT_CODE '\n' { + if (myloop == NULL) { + fprintf(stderr, "loop not initialized"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + else { + if (!is_interactive) { + if (loop_num_start != loop_num_end) + std::cout << "/* procedure :" << procedure_name << " loop #" << loop_num_start << "-" << loop_num_end << " */" << std::endl; + else + std::cout << "/* procedure :" << procedure_name << " loop #" << loop_num_start << " */" << std::endl; + + } + + myloop->printCode(); + } + if (is_interactive) printf("%s ", PROMPT_STRING); else printf("\n"); + } + | PRINT PRINT_DEP '\n' { + if (myloop == NULL) { + fprintf(stderr, "loop not initialized"); + PRINT_ERR_LINENO; + if (!is_interactive) + YYABORT; + } + else { + myloop->printDependenceGraph(); + } + if (is_interactive) printf("%s ", PROMPT_STRING); else printf("\n"); + } + | PRINT PRINT_IS '\n' { + if (myloop == NULL) { + fprintf(stderr, "loop not initialized"); + PRINT_ERR_LINENO; + if (!is_interactive) + YYABORT; + } + else { + myloop->printIterationSpace(); + } + if (is_interactive) printf("%s ", PROMPT_STRING); else printf("\n"); + } + | PRINT PRINT_STRUCTURE '\n' { + if (myloop == NULL) { + fprintf(stderr, "loop not initialized"); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + else { + myloop->print_internal_loop_structure(); + } + if (is_interactive) printf("%s ", PROMPT_STRING); else printf("\n"); + } + | PRINT expr '\n' { +/* if (parameter_tab.find(std::string($2)) == parameter_tab.end()) { + fprintf(stderr, "cannot print undefined variable %s\n", $2); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + std::cout << parameter_tab[std::string($2)] << std::endl; +*/ + std::cout << $2 << std::endl; + if (is_interactive) printf("%s ", PROMPT_STRING); else printf("\n"); + } + | EXIT '\n' { return(0); } + | VARIABLE '=' expr '\n' { + parameter_tab[std::string($1)] = $3; + delete []$1; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | KNOWN '(' cond ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + int num_dim = myloop->known.n_set(); + Relation rel(num_dim); + F_And *f_root = rel.add_and(); + for (int j = 0; j < $3->size(); j++) { + GEQ_Handle h = f_root->add_GEQ(); + for (std::map<std::string, int>::iterator it = (*$3)[j].begin(); it != (*$3)[j].end(); it++) { + try { + int dim = from_string<int>(it->first); + if (dim == 0) + h.update_const(it->second); + else + throw std::invalid_argument("only symbolic variables are allowed in known condition"); + } + catch (std::ios::failure e) { + Free_Var_Decl *g = NULL; + for (unsigned i = 0; i < myloop->freevar.size(); i++) { + std::string name = myloop->freevar[i]->base_name(); + if (name == it->first) { + g = myloop->freevar[i]; + break; + } + } + if (g == NULL) + throw std::invalid_argument("symbolic variable " + it->first + " not found"); + else + h.update_coef(rel.get_local(g), it->second); + } + } + } + myloop->addKnown(rel); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + exit(2); + } + } + delete $3; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | REMOVE_DEP '(' NUMBER ',' NUMBER ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->removeDependence($3, $5); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + YYABORT; + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | ORIGINAL '(' ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->original(); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | PERMUTE '(' vector ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->permute(*$3); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + exit(2); + } + } + delete $3; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | PERMUTE '(' expr ',' NUMBER ',' vector ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->permute($3, $5, *$7); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $7; + exit(2); + } + } + delete $7; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | PERMUTE '(' vector ',' vector ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + std::set<int> active; + for (int i = 0; i < (*$3).size(); i++) + active.insert((*$3)[i]); + + myloop->permute(active, *$5); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + delete $5; + exit(2); + } + } + delete $3; + delete $5; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | PRAGMA '(' NUMBER ',' NUMBER ',' STRING ')' '\n' { + myloop->pragma($3,$5,$7); + } + | PREFETCH '(' NUMBER ',' NUMBER ',' STRING ',' expr ')' '\n' { + myloop->prefetch($3, $5, $7, $9); + } + | TILE '(' expr ',' NUMBER ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->tile($3,$5,$7); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | TILE '(' expr',' NUMBER ',' expr ',' NUMBER ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->tile($3,$5,$7,$9); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | TILE '(' expr ',' NUMBER ',' expr ',' NUMBER ',' STRIDED ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->tile($3,$5,$7,$9,StridedTile); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | TILE '(' expr ',' NUMBER ',' expr ',' NUMBER ',' STRIDED ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->tile($3,$5,$7,$9,StridedTile,$13); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | TILE '(' expr ',' NUMBER ',' expr ',' NUMBER ',' STRIDED ',' expr ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->tile($3,$5,$7,$9,StridedTile,$13,$15); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | TILE '(' expr ',' NUMBER ',' expr ',' NUMBER ',' COUNTED ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->tile($3,$5,$7,$9,CountedTile); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | TILE '(' expr ',' NUMBER ',' expr ',' NUMBER ',' COUNTED ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->tile($3,$5,$7,$9,CountedTile,$13); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | TILE '(' expr ',' NUMBER ',' expr ',' NUMBER ',' COUNTED ',' expr ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->tile($3,$5,$7,$9,CountedTile,$13,$15); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | DATACOPY '(' matrix ',' NUMBER ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + std::vector<std::pair<int, std::vector<int> > > array_ref_nums((*$3).size()); + for (int i = 0; i < (*$3).size(); i++) { + if ((*$3)[i].size() <= 1) + throw std::invalid_argument("statement missing in the first parameter"); + array_ref_nums[i].first = (*$3)[i][0]; + for (int j = 1; j < (*$3)[i].size(); j++) + array_ref_nums[i].second.push_back((*$3)[i][j]); + } + myloop->datacopy(array_ref_nums,$5); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + exit(2); + } + } + delete $3; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | DATACOPY '(' matrix ',' NUMBER ',' TRUEORFALSE ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + std::vector<std::pair<int, std::vector<int> > > array_ref_nums((*$3).size()); + for (int i = 0; i < (*$3).size(); i++) { + if ((*$3)[i].size() <= 1) + throw std::invalid_argument("statement missing in the first parameter"); + array_ref_nums[i].first = (*$3)[i][0]; + for (int j = 1; j < (*$3)[i].size(); j++) + array_ref_nums[i].second.push_back((*$3)[i][j]); + } + myloop->datacopy(array_ref_nums,$5,$7); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + exit(2); + } + } + delete $3; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | DATACOPY '(' matrix ',' NUMBER ',' TRUEORFALSE ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + std::vector<std::pair<int, std::vector<int> > > array_ref_nums((*$3).size()); + for (int i = 0; i < (*$3).size(); i++) { + if ((*$3)[i].size() <= 1) + throw std::invalid_argument("statement missing in the first parameter"); + array_ref_nums[i].first = (*$3)[i][0]; + for (int j = 1; j < (*$3)[i].size(); j++) + array_ref_nums[i].second.push_back((*$3)[i][j]); + } + myloop->datacopy(array_ref_nums,$5,$7,$9); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + exit(2); + } + } + delete $3; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | DATACOPY '(' matrix ',' NUMBER ',' TRUEORFALSE ',' expr ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + std::vector<std::pair<int, std::vector<int> > > array_ref_nums((*$3).size()); + for (int i = 0; i < (*$3).size(); i++) { + if ((*$3)[i].size() <= 1) + throw std::invalid_argument("statement missing in the first parameter"); + array_ref_nums[i].first = (*$3)[i][0]; + for (int j = 1; j < (*$3)[i].size(); j++) + array_ref_nums[i].second.push_back((*$3)[i][j]); + } + myloop->datacopy(array_ref_nums,$5,$7,$9,$11); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + exit(2); + } + } + delete $3; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | DATACOPY '(' matrix ',' NUMBER ',' TRUEORFALSE ',' expr ',' expr ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + std::vector<std::pair<int, std::vector<int> > > array_ref_nums((*$3).size()); + for (int i = 0; i < (*$3).size(); i++) { + if ((*$3)[i].size() <= 1) + throw std::invalid_argument("statement missing in the first parameter"); + array_ref_nums[i].first = (*$3)[i][0]; + for (int j = 1; j < (*$3)[i].size(); j++) + array_ref_nums[i].second.push_back((*$3)[i][j]); + } + myloop->datacopy(array_ref_nums,$5,$7,$9,$11,$13); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + exit(2); + } + } + delete $3; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | DATACOPY '(' expr ',' NUMBER ',' VARIABLE ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->datacopy($3,$5,$7); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete []$7; + exit(2); + } + } + delete []$7; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | DATACOPY '(' expr ',' NUMBER ',' VARIABLE ',' TRUEORFALSE ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->datacopy($3,$5,$7,$9); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete []$7; + exit(2); + } + } + delete []$7; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | DATACOPY '(' expr ',' NUMBER ',' VARIABLE ',' TRUEORFALSE ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->datacopy($3,$5,$7,$9,$11); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete []$7; + exit(2); + } + } + delete []$7; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | DATACOPY '(' expr ',' NUMBER ',' VARIABLE ',' TRUEORFALSE ',' expr ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->datacopy($3,$5,$7,$9,$11,$13); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete []$7; + exit(2); + } + } + delete []$7; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | DATACOPY '(' expr ',' NUMBER ',' VARIABLE ',' TRUEORFALSE ',' expr ',' expr ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->datacopy($3,$5,$7,$9,$11,$13,$15); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete []$7; + exit(2); + } + } + delete []$7; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | DATACOPY_PRIVATIZED '(' matrix ',' NUMBER ',' vector ',' TRUEORFALSE ',' expr ',' expr ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + std::vector<std::pair<int, std::vector<int> > > array_ref_nums((*$3).size()); + for (int i = 0; i < (*$3).size(); i++) { + if ((*$3)[i].size() <= 1) + throw std::invalid_argument("statement missing in the first parameter"); + array_ref_nums[i].first = (*$3)[i][0]; + for (int j = 1; j < (*$3)[i].size(); j++) + array_ref_nums[i].second.push_back((*$3)[i][j]); + } + myloop->datacopy_privatized(array_ref_nums,$5,*$7,$9,$11,$13,$15); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + delete $7; + exit(2); + } + } + delete $3; + delete $7; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | DATACOPY_PRIVATIZED '(' expr ',' NUMBER ',' VARIABLE ',' vector ',' TRUEORFALSE ',' expr ',' expr ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->datacopy_privatized($3,$5,$7,*$9,$11,$13,$15,$17); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete []$7; + delete $9; + exit(2); + } + } + delete []$7; + delete $9; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | UNROLL '(' expr ',' NUMBER ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->unroll($3,$5,$7); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | UNROLL '(' expr ',' NUMBER ',' expr ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->unroll($3,$5,$7,std::vector< std::vector<std::string> >(), $9); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | UNROLL_EXTRA '(' expr ',' NUMBER ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->unroll_extra($3,$5,$7); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | UNROLL_EXTRA '(' expr ',' NUMBER ',' expr ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->unroll_extra($3,$5,$7,$9); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | SPLIT '(' expr ',' NUMBER ',' cond ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + if ($3 < 0 || $3 >= myloop->num_statement()) + throw std::invalid_argument("invalid statement " + to_string($3)); + int num_dim = myloop->stmt[$3].xform.n_out(); + + Relation rel((num_dim-1)/2); + F_And *f_root = rel.add_and(); + for (int j = 0; j < $7->size(); j++) { + GEQ_Handle h = f_root->add_GEQ(); + for (std::map<std::string, int>::iterator it = (*$7)[j].begin(); it != (*$7)[j].end(); it++) { + try { + int dim = from_string<int>(it->first); + if (dim == 0) + h.update_const(it->second); + else { + if (dim > (num_dim-1)/2) + throw std::invalid_argument("invalid loop level " + to_string(dim) + " in split condition"); + h.update_coef(rel.set_var(dim), it->second); + } + } + catch (std::ios::failure e) { + Free_Var_Decl *g = NULL; + for (unsigned i = 0; i < myloop->freevar.size(); i++) { + std::string name = myloop->freevar[i]->base_name(); + if (name == it->first) { + g = myloop->freevar[i]; + break; + } + } + if (g == NULL) + throw std::invalid_argument("unrecognized variable " + to_string(it->first.c_str())); + h.update_coef(rel.get_local(g), it->second); + } + } + } + myloop->split($3,$5,rel); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $7; + exit(2); + } + } + delete $7; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | NONSINGULAR '(' matrix ')' '\n' { + try { + myloop->nonsingular(*$3); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + exit(2); + } + } + delete $3; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | SKEW '(' vector ',' NUMBER ',' vector ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + std::set<int> stmt_nums; + for (int i = 0; i < (*$3).size(); i++) + stmt_nums.insert((*$3)[i]); + myloop->skew(stmt_nums, $5, *$7); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + delete $7; + exit(2); + } + } + delete $3; + delete $7; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | SCALE '(' vector ',' NUMBER ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + std::set<int> stmt_nums; + for (int i = 0; i < (*$3).size(); i++) + stmt_nums.insert((*$3)[i]); + myloop->scale(stmt_nums, $5, $7); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + exit(2); + } + } + delete $3; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | REVERSE '(' vector ',' NUMBER ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + std::set<int> stmt_nums; + for (int i = 0; i < (*$3).size(); i++) + stmt_nums.insert((*$3)[i]); + myloop->reverse(stmt_nums, $5); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + exit(2); + } + } + delete $3; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | SHIFT '(' vector ',' NUMBER ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + std::set<int> stmt_nums; + for (int i = 0; i < (*$3).size(); i++) + stmt_nums.insert((*$3)[i]); + + myloop->shift(stmt_nums, $5, $7); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + exit(2); + } + } + delete $3; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | SHIFT_TO '(' expr ',' NUMBER ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->shift_to($3, $5, $7); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + exit(2); + } + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | PEEL '(' NUMBER ',' NUMBER ',' expr ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->peel($3, $5, $7); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + exit(2); + } + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | PEEL '(' NUMBER ',' NUMBER ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + myloop->peel($3, $5); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + exit(2); + } + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | FUSE '(' vector ',' NUMBER ')' '\n' { + try { + if (myloop == NULL) + throw std::runtime_error("loop not initialized"); + + std::set<int> stmt_nums; + for (int i = 0; i < (*$3).size(); i++) + stmt_nums.insert((*$3)[i]); + + myloop->fuse(stmt_nums, $5); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) { + delete $3; + exit(2); + } + } + delete $3; + if (is_interactive) printf("%s ", PROMPT_STRING); + } + | DISTRIBUTE '(' vector ',' NUMBER ')' '\n' { + if (myloop == NULL) { + fprintf(stderr, "loop not initialized"); + PRINT_ERR_LINENO; + delete $3; + if (!is_interactive) + exit(2); + } + else { + std::set<int> stmt_nums; + for (int i = 0; i < (*$3).size(); i++) + stmt_nums.insert((*$3)[i]); + delete $3; + try { + myloop->distribute(stmt_nums, $5); + } + catch (const std::exception &e) { + fprintf(stderr, e.what()); + PRINT_ERR_LINENO; + if (!is_interactive) + exit(2); + } + if (is_interactive) printf("%s ", PROMPT_STRING); + } + } +; + +%% + +inline int yylex() { return lexer.yylex();} + +void yyerror(const char *str) { + int err_lineno = lexer.lineno(); + if (lexer.YYText()[0] == '\n') + err_lineno--; + + if (is_interactive) + fprintf(stderr, "%s\n", str); + else + fprintf(stderr, "%s at line %d\n", str, err_lineno); +} + +int main(int argc, char *argv[]) { + yydebug = 0; + + if (argc > 2) { + fprintf(stderr, "Usage: %s [script_file]\n", argv[0]); + exit(-1); + } + + std::ifstream script; + if (argc == 2) { + script.open(argv[1]); + if (!script.is_open()) { + printf("can't open script file \"%s\"\n", argv[1]); + exit(-1); + } + lexer.switch_streams(&script, &std::cout); + } + + if (argc == 1 && isatty((int)fileno(stdin))) { + is_interactive = true; + printf("CHiLL v0.2.0 (built on %s)\n", CHILL_BUILD_DATE); + printf("Copyright (C) 2008 University of Southern California\n"); + printf("Copyright (C) 2009-2012 University of Utah\n"); + printf("%s ", PROMPT_STRING); + } + else + is_interactive = false; + + ir_code = NULL; + initializeOmega(); + + if (yyparse() == 0) { + if (!is_interactive) + fprintf(stderr, "script success!\n"); + else + printf("\n"); + if (ir_code != NULL && myloop != NULL && myloop->isInitialized()) { + if (loop_num_start == loop_num_end) { + ir_code->ReplaceCode(ir_controls[loops[loop_num_start]], myloop->getCode()); + ir_controls[loops[loop_num_start]] = NULL; + } + else { + std::vector<IR_Control *> parm; + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) + parm.push_back(ir_controls[i]); + IR_Block *block = ir_code->MergeNeighboringControlStructures(parm); + ir_code->ReplaceCode(block, myloop->getCode()); + for (int i = loops[loop_num_start]; i <= loops[loop_num_end]; i++) { + delete ir_controls[i]; + ir_controls[i] = NULL; + } + } + } + } + + delete myloop; + for (int i = 0; i < ir_controls.size(); i++) + delete ir_controls[i]; + #ifdef BUILD_ROSE + ((IR_roseCode*)(ir_code))->finalizeRose(); + #endif + delete ir_code; + delete []source_filename; +} |