Merge branch 'master' of https://scm.cri.ensmp.fr/git/Faustine
[Faustine.git] / interpretor / preprocessor / faust-0.9.47mr3 / compiler / generator / klass.cpp
1 /************************************************************************
2 ************************************************************************
3 FAUST compiler
4 Copyright (C) 2003-2004 GRAME, Centre National de Creation Musicale
5 ---------------------------------------------------------------------
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 ************************************************************************
20 ************************************************************************/
21
22
23
24 /**********************************************************************
25 - klass.cpp : class C++ a remplir (projet FAUST) -
26
27
28 Historique :
29 -----------
30 17-10-2001 : implementation initiale (yo)
31 18-10-2001 : Ajout de getFreshID (yo)
32 02-11-2001 : Ajout de sous classes (yo)
33 06-11-2001 : modif impression des classes (yo)
34
35 ***********************************************************************/
36
37 #include <stdio.h>
38 #include <iostream>
39 #include <sstream>
40 #include <string>
41 #include <list>
42 #include <map>
43
44 #include "floats.hh"
45 #include "smartpointer.hh"
46 #include "klass.hh"
47 #include "uitree.hh"
48 #include "Text.hh"
49 #include "signals.hh"
50 #include "ppsig.hh"
51 #include "recursivness.hh"
52
53
54 extern bool gVectorSwitch;
55 extern bool gDeepFirstSwitch;
56 extern bool gOpenMPSwitch;
57 extern bool gOpenMPLoop;
58 extern bool gSchedulerSwitch;
59 extern int gVecSize;
60 extern bool gUIMacroSwitch;
61 extern int gVectorLoopVariant;
62 extern bool gGroupTaskSwitch;
63
64 extern map<Tree, set<Tree> > gMetaDataSet;
65 static int gTaskCount = 0;
66
67 void tab (int n, ostream& fout)
68 {
69 fout << '\n';
70 while (n--) fout << '\t';
71 }
72
73 bool Klass::fNeedPowerDef = false;
74
75 /**
76 * Store the loop used to compute a signal
77 */
78 void Klass::setLoopProperty(Tree sig, Loop* l)
79 {
80 fLoopProperty.set(sig,l);
81 }
82
83 /**
84 * Returns the loop used to compute a signal
85 */
86 bool Klass::getLoopProperty(Tree sig, Loop*& l)
87 {
88 return fLoopProperty.get(sig, l);
89 }
90
91 /**
92 * Open a non-recursive loop on top of the stack of open loops.
93 * @param size the number of iterations of the loop
94 */
95 void Klass::openLoop(const string& size)
96 {
97 fTopLoop = new Loop(fTopLoop, size);
98 //cerr << "\nOPEN SHARED LOOP(" << size << ") ----> " << fTopLoop << endl;
99 }
100
101 /**
102 * Open a recursive loop on top of the stack of open loops.
103 * @param recsymbol the recursive symbol defined in this loop
104 * @param size the number of iterations of the loop
105 */
106 void Klass::openLoop(Tree recsymbol, const string& size)
107 {
108 fTopLoop = new Loop(recsymbol, fTopLoop, size);
109 //cerr << "\nOPEN REC LOOP(" << *recsymbol << ", " << size << ") ----> " << fTopLoop << endl;
110 }
111
112 /**
113 * Close the top loop and either keep it
114 * or absorb it within its enclosing loop.
115 */
116 void Klass::closeLoop(Tree sig)
117 {
118 assert(fTopLoop);
119 Loop* l = fTopLoop;
120 fTopLoop = l->fEnclosingLoop;
121 assert(fTopLoop);
122
123 //l->println(4, cerr);
124 //cerr << endl;
125
126 Tree S = symlist(sig);
127 //cerr << "CLOSE LOOP :" << l << " with symbols " << *S << endl;
128 if (l->isEmpty() || fTopLoop->hasRecDependencyIn(S)) {
129 //cout << " will absorb" << endl;
130 // empty or dependent loop -> absorbed by enclosing one
131 //cerr << "absorbed by : " << fTopLoop << endl;
132 fTopLoop->absorb(l);
133 //delete l; HACK !!!
134 } else {
135 // cout << " will NOT absorb" << endl;
136 // we have an independent loop
137 setLoopProperty(sig,l); // associate the signal
138 fTopLoop->fBackwardLoopDependencies.insert(l);
139 // we need to indicate that all recursive symbols defined
140 // in this loop are defined in this loop
141 for (Tree lsym=l->fRecSymbolSet; !isNil(lsym); lsym=tl(lsym)) {
142 this->setLoopProperty(hd(lsym), l);
143 //cerr << "loop " << l << " defines " << *hd(lsym) << endl;
144 }
145 }
146 //cerr << "\n" << endl;
147 }
148
149 /**
150 * Print a list of lines.
151 */
152 void printlines(int n, list<string>& lines, ostream& fout)
153 {
154 list<string>::iterator s;
155 for (s = lines.begin(); s != lines.end(); s++) {
156 tab(n, fout); fout << *s;
157 }
158 }
159
160 /**
161 * Print a list of elements (e1, e2,...)
162 */
163 void printdecllist(int n, const string& decl, list<string>& content, ostream& fout)
164 {
165 if (!content.empty()) {
166 list<string>::iterator s;
167 fout << "\\";
168 tab(n, fout); fout << decl;
169 string sep = "(";
170 for (s = content.begin(); s != content.end(); s++) {
171 fout << sep << *s;
172 sep = ", ";
173 }
174 fout << ')';
175 }
176 }
177
178 /**
179 * Print the required C++ libraries as comments in source code
180 */
181 void Klass::printLibrary(ostream& fout)
182 {
183 set<string> S;
184 set<string>::iterator f;
185
186 string sep;
187 collectLibrary(S);
188 fout << "/* link with ";
189 for (f = S.begin(), sep =": "; f != S.end(); f++, sep = ", ") {
190 fout << sep << *f;
191 }
192 fout << " */\n";
193 }
194
195 /**
196 * Print the required include files
197 */
198 void Klass::printIncludeFile(ostream& fout)
199 {
200 set<string> S;
201 set<string>::iterator f;
202
203 if (gOpenMPSwitch) {
204 fout << "#include <omp.h>" << "\n";
205 }
206
207 collectIncludeFile(S);
208 for (f = S.begin(); f != S.end(); f++) {
209 fout << "#include " << *f << "\n";
210 }
211 }
212
213 /**
214 * Print additional functions required by the generated code
215 */
216 void Klass::printAdditionalCode(ostream& fout)
217 {
218 if (fNeedPowerDef) {
219 // Add faustpower definition to C++ code
220 fout << "#include <cmath>" << endl;
221 fout << "template <int N> inline float faustpower(float x) { return powf(x,N); } " << endl;
222 fout << "template <int N> inline double faustpower(double x) { return pow(x,N); }" << endl;
223 fout << "template <int N> inline int faustpower(int x) { return faustpower<N/2>(x) * faustpower<N-N/2>(x); } " << endl;
224 fout << "template <> inline int faustpower<0>(int x) { return 1; }" << endl;
225 fout << "template <> inline int faustpower<1>(int x) { return x; }" << endl;
226 }
227
228 }
229
230 /**
231 * Print metadata declaration
232 */
233 void Klass::printMetadata(int n, const map<Tree, set<Tree> >& S, ostream& fout)
234 {
235 tab(n,fout); fout << "static void metadata(Meta* m) \t{ ";
236
237 for (map<Tree, set<Tree> >::iterator i = gMetaDataSet.begin(); i != gMetaDataSet.end(); i++) {
238 if (i->first != tree("author")) {
239 tab(n+1,fout); fout << "m->declare(\"" << *(i->first) << "\", " << **(i->second.begin()) << ");";
240 } else {
241 for (set<Tree>::iterator j = i->second.begin(); j != i->second.end(); j++) {
242 if (j == i->second.begin()) {
243 tab(n+1,fout); fout << "m->declare(\"" << *(i->first) << "\", " << **j << ");" ;
244 } else {
245 tab(n+1,fout); fout << "m->declare(\"" << "contributor" << "\", " << **j << ");";
246 }
247 }
248 }
249 }
250
251 tab(n,fout); fout << "}" << endl;
252 }
253
254 inline bool isElement(const set<Loop*>& S, Loop* l)
255 {
256 return S.find(l)!= S.end();
257 }
258
259 /**
260 * Print a loop graph deep first
261 */
262 void Klass::printLoopDeepFirst(int n, ostream& fout, Loop* l, set<Loop*>& visited)
263 {
264 // avoid printing already printed loops
265 if (isElement(visited, l)) return;
266
267 // remember we have printed this loop
268 visited.insert(l);
269
270 // print the dependencies loops (that need to be computed before this one)
271 for (lset::const_iterator p =l->fBackwardLoopDependencies.begin(); p!=l->fBackwardLoopDependencies.end(); p++) {
272 printLoopDeepFirst(n, fout, *p, visited);
273 }
274 // the print the loop itself
275 tab(n, fout);
276 tab(n, fout); fout << "// LOOP " << l << ", ORDER " << l->fOrder << endl;
277 l->println(n+1, fout);
278 }
279
280 /**
281 * Compute how many time each loop is used in a DAG
282 */
283 static void computeUseCount(Loop* l)
284 {
285 l->fUseCount++;
286 if (l->fUseCount == 1) {
287 for (lset::iterator p =l->fBackwardLoopDependencies.begin(); p!=l->fBackwardLoopDependencies.end(); p++) {
288 computeUseCount(*p);
289 }
290 }
291 }
292
293 /**
294 * Group together sequences of loops
295 */
296 static void groupSeqLoops(Loop* l)
297 {
298 int n = l->fBackwardLoopDependencies.size();
299 if (n==0) {
300 return;
301 } else if (n==1) {
302 Loop* f = *(l->fBackwardLoopDependencies.begin());
303 if (f->fUseCount == 1) {
304 l->concat(f);
305 groupSeqLoops(l);
306 } else {
307 groupSeqLoops(f);
308 }
309 return;
310 } else if (n > 1) {
311 for (lset::iterator p =l->fBackwardLoopDependencies.begin(); p!=l->fBackwardLoopDependencies.end(); p++) {
312 groupSeqLoops(*p);
313 }
314 }
315 }
316
317 #define WORK_STEALING_INDEX 0
318 #define LAST_TASK_INDEX 1
319 #define START_TASK_INDEX LAST_TASK_INDEX + 1
320
321 #define START_TASK_MAX 2
322
323 void Klass::buildTasksList()
324 {
325 lgraph G;
326
327 if (gGroupTaskSwitch) {
328 computeUseCount(fTopLoop);
329 groupSeqLoops(fTopLoop);
330 }
331
332 sortGraph(fTopLoop, G);
333 int index_task = START_TASK_INDEX;
334
335 addDeclCode("TaskGraph fGraph;");
336 addDeclCode("FAUSTFLOAT** input;");
337 addDeclCode("FAUSTFLOAT** output;");
338 addDeclCode("volatile bool fIsFinished;");
339 addDeclCode("int fFullCount;");
340 addDeclCode("int fIndex;");
341 addDeclCode("DSPThreadPool* fThreadPool;");
342 addDeclCode("int fStaticNumThreads;");
343 addDeclCode("int fDynamicNumThreads;");
344
345 // Compute forward dependencies
346 for (int l=G.size()-1; l>=0; l--) {
347 for (lset::const_iterator p =G[l].begin(); p!=G[l].end(); p++) {
348 for (lset::const_iterator p1 = (*p)->fBackwardLoopDependencies.begin(); p1!=(*p)->fBackwardLoopDependencies.end(); p1++) {
349 (*p1)->fForwardLoopDependencies.insert((*p));
350 }
351 (*p)->fIndex = index_task;
352 index_task++;
353 }
354 }
355
356 // Compute ready tasks list
357 vector<int> task_num;
358 for (int l=G.size()-1; l>=0; l--) {
359 lset::const_iterator next;
360 for (lset::const_iterator p =G[l].begin(); p!=G[l].end(); p++) {
361 if ((*p)->fBackwardLoopDependencies.size() == 0) {
362 task_num.push_back((*p)->fIndex);
363 }
364 }
365 }
366
367 if (task_num.size() < START_TASK_MAX) {
368
369 // Push ready tasks thread 0, execute one task directly
370
371 addZone3("if (cur_thread == 0) {");
372
373 Loop* keep = NULL;
374 for (int l=G.size()-1; l>=0; l--) {
375 lset::const_iterator next;
376 for (lset::const_iterator p =G[l].begin(); p!=G[l].end(); p++) {
377 if ((*p)->fBackwardLoopDependencies.size() == 0) {
378 if (keep == NULL) {
379 keep = *p;
380 } else {
381 addZone3(subst(" taskqueue.PushHead($0);", T((*p)->fIndex)));
382 }
383 }
384 }
385 }
386
387 if (keep != NULL) {
388 addZone3(subst(" tasknum = $0;", T(keep->fIndex)));
389 }
390
391 addZone3("} else {");
392 addZone3(" tasknum = TaskQueue::GetNextTask(cur_thread, fDynamicNumThreads);");
393 addZone3("}");
394
395 } else {
396
397 // Cut ready tasks list and have each thread (dynamically) use a subpart
398 addZone3(subst("int task_list_size = $0;", T((int)task_num.size())));
399 stringstream buf;
400 buf << "int task_list[" << task_num.size() << "] = {";
401 for(size_t i = 0; i < task_num.size(); i++) {
402 buf << task_num[i];
403 if (i != (task_num.size() - 1))
404 buf << ",";
405 }
406 buf << "};";
407
408 addZone3(buf.str());
409 addZone3("taskqueue.InitTaskList(task_list_size, task_list, fDynamicNumThreads, cur_thread, tasknum);");
410 }
411
412 // Last stage connected to end task
413 if (G[0].size() > 1) {
414 addZone2c("// Initialize end task, if more than one input");
415 addZone2c(subst("fGraph.InitTask($0,$1);", T(LAST_TASK_INDEX), T((int)G[0].size())));
416 } else {
417 addZone2c("// End task has only one input, so will be directly activated");
418 }
419
420 // Compute init section
421 addZone2c("// Only initialize taks with more than one input");
422 for (int l=G.size()-1; l>=0; l--) {
423 for (lset::const_iterator p =G[l].begin(); p!=G[l].end(); p++) {
424 if ((*p)->fBackwardLoopDependencies.size() > 1) { // Only initialize taks with more than 1 input, since taks with one input are "directly" activated.
425 addZone2c(subst("fGraph.InitTask($0,$1);", T(START_TASK_INDEX + gTaskCount++), T((int)(*p)->fBackwardLoopDependencies.size())));
426 } else {
427 gTaskCount++;
428 }
429 }
430 }
431
432 addInitCode("fStaticNumThreads = get_max_cpu();");
433 addInitCode("fDynamicNumThreads = getenv(\"OMP_NUM_THREADS\") ? atoi(getenv(\"OMP_NUM_THREADS\")) : fStaticNumThreads;");
434 addInitCode("fThreadPool = DSPThreadPool::Init();");
435 addInitCode("fThreadPool->StartAll(fStaticNumThreads - 1, false);");
436
437 gTaskCount = 0;
438 }
439
440 /**
441 * Print the loop graph (used for vector code)
442 */
443 void Klass::printLoopGraphVector(int n, ostream& fout)
444 {
445 if (gGroupTaskSwitch) {
446 computeUseCount(fTopLoop);
447 groupSeqLoops(fTopLoop);
448 }
449
450 lgraph G;
451 sortGraph(fTopLoop, G);
452
453 #if 1
454 // EXPERIMENTAL
455 if (gVectorSwitch && gDeepFirstSwitch) {
456 set<Loop*> visited;
457 printLoopDeepFirst(n, fout, fTopLoop, visited);
458 return;
459 }
460 #endif
461
462 // normal mode
463 for (int l=G.size()-1; l>=0; l--) {
464 if (gVectorSwitch) { tab(n, fout); fout << "// SECTION : " << G.size() - l; }
465 for (lset::const_iterator p =G[l].begin(); p!=G[l].end(); p++) {
466 (*p)->println(n, fout);
467 }
468 }
469 }
470
471 /**
472 * Print the loop graph as a serie of parallel loops
473 */
474 void Klass::printLoopGraphOpenMP(int n, ostream& fout)
475 {
476 if (gGroupTaskSwitch) {
477 computeUseCount(fTopLoop);
478 groupSeqLoops(fTopLoop);
479 }
480
481 lgraph G;
482 sortGraph(fTopLoop, G);
483
484 // OpenMP mode : add OpenMP directives
485 for (int l=G.size()-1; l>=0; l--) {
486 tab(n, fout); fout << "// SECTION : " << G.size() - l;
487 printLoopLevelOpenMP(n, G.size() - l, G[l], fout);
488 }
489 }
490
491 /**
492 * Print the loop graph as a serie of parallel loops
493 */
494 void Klass::printLoopGraphScheduler(int n, ostream& fout)
495 {
496 if (gGroupTaskSwitch) {
497 computeUseCount(fTopLoop);
498 groupSeqLoops(fTopLoop);
499 }
500
501 lgraph G;
502 sortGraph(fTopLoop, G);
503
504 // OpenMP mode : add OpenMP directives
505 for (int l=G.size()-1; l>0; l--) {
506 tab(n, fout); fout << "// SECTION : " << G.size() - l;
507 printLoopLevelScheduler(n, G.size() - l, G[l], fout);
508 }
509
510 printLastLoopLevelScheduler(n, G.size(), G[0], fout);
511 }
512
513
514 /**
515 * Print the loop graph in dot format
516 */
517 void Klass::printGraphDotFormat(ostream& fout)
518 {
519 lgraph G;
520 sortGraph(fTopLoop, G);
521
522 fout << "strict digraph loopgraph {" << endl;
523 fout << '\t' << "rankdir=LR;" << endl;
524 fout << '\t' << "node[color=blue, fillcolor=lightblue, style=filled, fontsize=9];" << endl;
525
526 int lnum = 0; // used for loop numbers
527 // for each level of the graph
528 for (int l=G.size()-1; l>=0; l--) {
529 // for each task in the level
530 for (lset::const_iterator t =G[l].begin(); t!=G[l].end(); t++) {
531 // print task label "Lxxx : 0xffffff"
532 fout << '\t' << 'L'<<(*t)<<"[label=<<font face=\"verdana,bold\">L"<<lnum++<<"</font> : "<<(*t)<<">];"<<endl;
533 // for each source of the task
534 for (lset::const_iterator src = (*t)->fBackwardLoopDependencies.begin(); src!=(*t)->fBackwardLoopDependencies.end(); src++) {
535 // print the connection Lxxx -> Lyyy;
536 fout << '\t' << 'L'<<(*src)<<"->"<<'L'<<(*t)<<';'<<endl;
537 }
538 }
539 }
540 fout << "}" << endl;
541 }
542
543 /**
544 * Print the loop graph (used for internals classes)
545 */
546 void Klass::printLoopGraphInternal(int n, ostream& fout)
547 {
548 lgraph G;
549 sortGraph(fTopLoop, G);
550
551 // normal mode
552 for (int l=G.size()-1; l>=0; l--) {
553 if (gVectorSwitch) { tab(n, fout); fout << "// SECTION : " << G.size() - l; }
554 for (lset::const_iterator p =G[l].begin(); p!=G[l].end(); p++) {
555 (*p)->printoneln(n, fout);
556 }
557 }
558 }
559
560 /**
561 * Print the loop graph (scalar mode)
562 */
563 void Klass::printLoopGraphScalar(int n, ostream& fout)
564 {
565 fTopLoop->printoneln(n, fout);
566 }
567
568 /**
569 * returns true if all the loops are non recursive
570 */
571 static bool nonRecursiveLevel(const lset& L)
572 {
573 for (lset::const_iterator p =L.begin(); p!=L.end(); p++) {
574 if ((*p)->fIsRecursive) return false;
575 }
576 return true;
577 }
578
579 /**
580 * Print the 'level' of the loop graph as a set of
581 * parallel loops
582 */
583 void Klass::printLoopLevelOpenMP(int n, int lnum, const lset& L, ostream& fout)
584 {
585 if (nonRecursiveLevel(L) && L.size()==1) {
586 for (lset::const_iterator p =L.begin(); p!=L.end(); p++) {
587 if ((*p)->isEmpty() == false) {
588 if (gOpenMPLoop) {
589 (*p)->printParLoopln(n, fout);
590 } else {
591 tab(n, fout); fout << "#pragma omp single ";
592 tab(n, fout); fout << "{ ";
593 (*p)->println(n+1, fout);
594 tab(n, fout); fout << "} ";
595 }
596 }
597 }
598
599 } else if (L.size() > 1) {
600 tab(n, fout); fout << "#pragma omp sections ";
601 tab(n, fout); fout << "{ ";
602 for (lset::const_iterator p =L.begin(); p!=L.end(); p++) {
603 tab(n+1, fout); fout << "#pragma omp section ";
604 tab(n+1, fout); fout << "{";
605 (*p)->println(n+2, fout);
606 tab(n+1, fout); fout << "} ";
607 }
608 tab(n, fout); fout << "} ";
609 } else if (L.size() == 1 && !(*L.begin())->isEmpty()) {
610 tab(n, fout); fout << "#pragma omp single ";
611 tab(n, fout); fout << "{ ";
612 for (lset::const_iterator p =L.begin(); p!=L.end(); p++) {
613 (*p)->println(n+1, fout);
614 }
615 tab(n, fout); fout << "} ";
616 }
617 }
618
619 /**
620 * Print the 'level' of the loop graph as a set of
621 * parallel loops
622 */
623 void Klass::printLastLoopLevelScheduler(int n, int lnum, const lset& L, ostream& fout)
624 {
625 if (nonRecursiveLevel(L) && L.size() == 1 && !(*L.begin())->isEmpty()) {
626
627 lset::const_iterator p =L.begin();
628 tab(n, fout); fout << "case " << gTaskCount++ << ": { ";
629 (*p)->println(n+1, fout);
630 tab(n+1, fout); fout << "tasknum = LAST_TASK_INDEX;";
631 tab(n+1, fout); fout << "break;";
632 tab(n, fout); fout << "} ";
633
634 } else if (L.size() > 1) {
635
636 for (lset::const_iterator p =L.begin(); p!=L.end(); p++) {
637 tab(n, fout); fout << "case " << gTaskCount++ << ": { ";
638 (*p)->println(n+1, fout);
639 tab(n+1, fout); fout << "fGraph.ActivateOneOutputTask(taskqueue, LAST_TASK_INDEX, tasknum);";
640 tab(n+1, fout); fout << "break;";
641 tab(n, fout); fout << "} ";
642 }
643
644 } else if (L.size() == 1 && !(*L.begin())->isEmpty()) {
645
646 lset::const_iterator p =L.begin();
647 tab(n, fout); fout << "case " << gTaskCount++ << ": { ";
648 (*p)->println(n+1, fout);
649 tab(n+1, fout); fout << "tasknum = LAST_TASK_INDEX;";
650 tab(n+1, fout); fout << "break;";
651 tab(n, fout); fout << "} ";
652
653 }
654 }
655
656 void Klass::printOneLoopScheduler(lset::const_iterator p, int n, ostream& fout)
657 {
658 tab(n, fout); fout << "case " << gTaskCount++ << ": { ";
659 (*p)->println(n+1, fout);
660
661 // One output only
662 if ((*p)->fForwardLoopDependencies.size() == 1) {
663
664 lset::const_iterator p1 = (*p)->fForwardLoopDependencies.begin();
665 if ((*p1)->fBackwardLoopDependencies.size () == 1) {
666 tab(n+1, fout); fout << subst("tasknum = $0;", T((*p1)->fIndex));
667 } else {
668 tab(n+1, fout); fout << subst("fGraph.ActivateOneOutputTask(taskqueue, $0, tasknum);", T((*p1)->fIndex));
669 }
670
671 } else {
672
673 Loop* keep = NULL;
674 // Find one output with only one backward dependencies
675 for (lset::const_iterator p1 = (*p)->fForwardLoopDependencies.begin(); p1!=(*p)->fForwardLoopDependencies.end(); p1++) {
676 if ((*p1)->fBackwardLoopDependencies.size () == 1) {
677 keep = *p1;
678 break;
679 }
680 }
681
682 if (keep == NULL) {
683 tab(n+1, fout); fout << "tasknum = WORK_STEALING_INDEX;";
684 }
685
686 for (lset::const_iterator p1 = (*p)->fForwardLoopDependencies.begin(); p1!=(*p)->fForwardLoopDependencies.end(); p1++) {
687 if ((*p1)->fBackwardLoopDependencies.size () == 1) { // Task is the only input
688 if (*p1 != keep) {
689 tab(n+1, fout); fout << subst("taskqueue.PushHead($0);", T((*p1)->fIndex));
690 }
691 } else {
692 if (keep == NULL) {
693 tab(n+1, fout); fout << subst("fGraph.ActivateOutputTask(taskqueue, $0, tasknum);", T((*p1)->fIndex));
694 } else {
695 tab(n+1, fout); fout << subst("fGraph.ActivateOutputTask(taskqueue, $0);", T((*p1)->fIndex));
696 }
697 }
698 }
699
700 if (keep != NULL) {
701 tab(n+1, fout); fout << subst("tasknum = $0;", T(keep->fIndex)); // Last one
702 } else {
703 tab(n+1, fout); fout << "fGraph.GetReadyTask(taskqueue, tasknum);"; // Last one
704 }
705 }
706
707 tab(n+1, fout); fout << "break;";
708 tab(n, fout); fout << "} ";
709 }
710
711 /**
712 * Print the 'level' of the loop graph as a set of
713 * parallel loops
714 */
715
716 void Klass::printLoopLevelScheduler(int n, int lnum, const lset& L, ostream& fout)
717 {
718 if (nonRecursiveLevel(L) && L.size() == 1 && !(*L.begin())->isEmpty()) {
719 printOneLoopScheduler(L.begin(), n, fout);
720 } else if (L.size() > 1) {
721 for (lset::const_iterator p = L.begin(); p != L.end(); p++) {
722 printOneLoopScheduler(p, n, fout);
723 }
724 } else if (L.size() == 1 && !(*L.begin())->isEmpty()) {
725 printOneLoopScheduler(L.begin(), n, fout);
726 }
727 }
728
729 /**
730 * Print a full C++ class corresponding to a Faust dsp
731 */
732 void Klass::println(int n, ostream& fout)
733 {
734 list<Klass* >::iterator k;
735
736 tab(n,fout); fout << "#define FAUSTCLASS "<< fKlassName << endl;
737
738 if (gSchedulerSwitch) {
739 tab(n,fout); fout << "class " << fKlassName << " : public " << fSuperKlassName << ", public Runnable {";
740 } else {
741 tab(n,fout); fout << "class " << fKlassName << " : public " << fSuperKlassName << " {";
742 }
743
744 if (gUIMacroSwitch) {
745 tab(n,fout); fout << " public:";
746 } else {
747 tab(n,fout); fout << " private:";
748 }
749
750 for (k = fSubClassList.begin(); k != fSubClassList.end(); k++) (*k)->println(n+1, fout);
751
752 printlines(n+1, fDeclCode, fout);
753
754 tab(n,fout); fout << " public:";
755
756 printMetadata(n+1, gMetaDataSet, fout);
757
758 if (gSchedulerSwitch) {
759 tab(n+1,fout); fout << "virtual ~" << fKlassName << "() \t{ "
760 << "DSPThreadPool::Destroy()"
761 << "; }";
762 }
763
764 tab(n+1,fout); fout << "virtual int getNumInputs() \t{ "
765 << "return " << fNumInputs
766 << "; }";
767 tab(n+1,fout); fout << "virtual int getNumOutputs() \t{ "
768 << "return " << fNumOutputs
769 << "; }";
770
771 tab(n+1,fout); fout << "static void classInit(int samplingFreq) {";
772 printlines (n+2, fStaticInitCode, fout);
773 tab(n+1,fout); fout << "}";
774
775 tab(n+1,fout); fout << "virtual void instanceInit(int samplingFreq) {";
776 tab(n+2,fout); fout << "fSamplingFreq = samplingFreq;";
777 printlines (n+2, fInitCode, fout);
778 tab(n+1,fout); fout << "}";
779
780 tab(n+1,fout); fout << "virtual void init(int samplingFreq) {";
781 tab(n+2,fout); fout << "classInit(samplingFreq);";
782 tab(n+2,fout); fout << "instanceInit(samplingFreq);";
783 tab(n+1,fout); fout << "}";
784
785
786 tab(n+1,fout); fout << "virtual void buildUserInterface(UI* interface) {";
787 printlines (n+2, fUICode, fout);
788 tab(n+1,fout); fout << "}";
789
790 printComputeMethod(n, fout);
791
792 tab(n,fout); fout << "};\n" << endl;
793
794 printlines(n, fStaticFields, fout);
795
796 // generate user interface macros if needed
797 if (gUIMacroSwitch) {
798 tab(n, fout); fout << "#ifdef FAUST_UIMACROS";
799 tab(n+1,fout); fout << "#define FAUST_INPUTS " << fNumInputs;
800 tab(n+1,fout); fout << "#define FAUST_OUTPUTS " << fNumOutputs;
801 tab(n+1,fout); fout << "#define FAUST_ACTIVES " << fNumActives;
802 tab(n+1,fout); fout << "#define FAUST_PASSIVES " << fNumPassives;
803 printlines(n+1, fUIMacro, fout);
804 tab(n, fout); fout << "#endif";
805 }
806
807 fout << endl;
808 }
809
810 /**
811 * Print Compute() method according to the various switch
812 */
813 void Klass::printComputeMethod(int n, ostream& fout)
814 {
815 if (gSchedulerSwitch) {
816 printComputeMethodScheduler (n, fout);
817 } else if (gOpenMPSwitch) {
818 printComputeMethodOpenMP (n, fout);
819 } else if (gVectorSwitch) {
820 switch (gVectorLoopVariant) {
821 case 0 : printComputeMethodVectorFaster(n, fout); break;
822 case 1 : printComputeMethodVectorSimple(n, fout); break;
823 default : cerr << "unknown loop variant " << gVectorLoopVariant << endl; exit(1);
824 }
825 } else {
826 printComputeMethodScalar(n, fout);
827 }
828 }
829
830 void Klass::printComputeMethodScalar(int n, ostream& fout)
831 {
832 tab(n+1,fout); fout << subst("virtual void compute (int count, $0** input, $0** output) {", xfloat());
833 printlines (n+2, fZone1Code, fout);
834 printlines (n+2, fZone2Code, fout);
835 printlines (n+2, fZone2bCode, fout);
836 printlines (n+2, fZone3Code, fout);
837 printLoopGraphScalar (n+2,fout);
838 tab(n+1,fout); fout << "}";
839 }
840
841 /**
842 * Uses loops of constant gVecSize boundary in order to provide the
843 * C compiler with more optimisation opportunities. Improves performances
844 * in general, but not always
845 */
846 void Klass::printComputeMethodVectorFaster(int n, ostream& fout)
847 {
848 // in vector mode we need to split loops in smaller pieces not larger
849 // than gVecSize
850 tab(n+1,fout); fout << subst("virtual void compute (int fullcount, $0** input, $0** output) {", xfloat());
851 printlines(n+2, fZone1Code, fout);
852 printlines(n+2, fZone2Code, fout);
853 printlines(n+2, fZone2bCode, fout);
854
855 tab(n+2,fout); fout << "int index;";
856 tab(n+2,fout); fout << "for (index = 0; index <= fullcount - " << gVecSize << "; index += " << gVecSize << ") {";
857 tab(n+3,fout); fout << "// compute by blocks of " << gVecSize << " samples";
858 tab(n+3,fout); fout << "const int count = " << gVecSize << ";";
859 printlines (n+3, fZone3Code, fout);
860 printLoopGraphVector(n+3,fout);
861 tab(n+2,fout); fout << "}";
862
863 tab(n+2,fout); fout << "if (index < fullcount) {";
864 tab(n+3,fout); fout << "// compute the remaining samples if any";
865 tab(n+3,fout); fout << "int count = fullcount-index;";
866 printlines (n+3, fZone3Code, fout);
867 printLoopGraphVector(n+3,fout);
868 tab(n+2,fout); fout << "}";
869 tab(n+1,fout); fout << "}";
870 }
871
872 /**
873 * Simple loop layout, generally less efficient than printComputeMethodVectorFaster
874 */
875 void Klass::printComputeMethodVectorSimple(int n, ostream& fout)
876 {
877 // in vector mode we need to split loops in smaller pieces not larger
878 // than gVecSize
879 tab(n+1,fout); fout << subst("virtual void compute (int fullcount, $0** input, $0** output) {", xfloat());
880 printlines(n+2, fZone1Code, fout);
881 printlines(n+2, fZone2Code, fout);
882 printlines(n+2, fZone2bCode, fout);
883 tab(n+2,fout); fout << "for (int index = 0; index < fullcount; index += " << gVecSize << ") {";
884 tab(n+3,fout); fout << "int count = min("<< gVecSize << ", fullcount-index);";
885 printlines (n+3, fZone3Code, fout);
886 printLoopGraphVector(n+3,fout);
887 tab(n+2,fout); fout << "}";
888 tab(n+1,fout); fout << "}";
889 }
890
891 /*
892 void Klass::printComputeMethodVectorFix0 (int n, ostream& fout)
893 {
894 // in vector mode we need to split loops in smaller pieces not larger
895 // than gVecSize
896 tab(n+1,fout); fout << "virtual void compute (int fullcount, float** input, float** output) {";
897 printlines(n+2, fZone1Code, fout);
898 printlines(n+2, fZone2Code, fout);
899 printlines(n+2, fZone2bCode, fout);
900 tab(n+2,fout); fout << "for (int index = 0; index < fullcount; index += " << gVecSize << ") {";
901 tab(n+3,fout); fout << "if (fullcount >= index + " << gVecSize << ") {";
902 tab(n+4,fout); fout << "// compute by blocks of " << gVecSize << " samples";
903 tab(n+4,fout); fout << "const int count = " << gVecSize << ";"; // temporaire
904 printlines(n+4, fZone3Code, fout);
905 printLoopGraph (n+4,fout);
906 tab(n+3,fout); fout << "} else if (fullcount > index) {";
907 //tab(n+3,fout); fout << "int count = min ("<< gVecSize << ", fullcount-index);";
908 tab(n+4,fout); fout << "// compute the remaining samples";
909 tab(n+4,fout); fout << "int count = fullcount-index;" ;
910 printlines(n+4, fZone3Code, fout);
911 printLoopGraph (n+4,fout);
912 tab(n+3,fout); fout << "}";
913 tab(n+2,fout); fout << "}";
914 tab(n+1,fout); fout << "}";
915 }
916
917 void Klass::printComputeMethodVectorFix1 (int n, ostream& fout)
918 {
919 // in vector mode we need to split loops in smaller pieces not larger
920 // than gVecSize
921 tab(n+1,fout); fout << "virtual void compute (int fullcount, float** input, float** output) {";
922 printlines(n+2, fZone1Code, fout);
923 printlines(n+2, fZone2Code, fout);
924 printlines(n+2, fZone2bCode, fout);
925
926 tab(n+2,fout); fout << "int \tblock;";
927 tab(n+2,fout); fout << "for (block = 0; block < fullcount/" << gVecSize << "; block++) {";
928 tab(n+3,fout); fout << "// compute by blocks of " << gVecSize << " samples";
929 tab(n+3,fout); fout << "const int index = block*" << gVecSize << ";";
930 tab(n+3,fout); fout << "const int count = " << gVecSize << ";"; // temporaire
931 printlines(n+3, fZone3Code, fout);
932 printLoopGraph (n+3,fout);
933 tab(n+2,fout); fout << "}";
934
935 tab(n+2,fout); fout << "if (fullcount%" << gVecSize << " != 0) {";
936 //tab(n+3,fout); fout << "int count = min ("<< gVecSize << ", fullcount-index);";
937 tab(n+3,fout); fout << "// compute the remaining samples";
938 tab(n+3,fout); fout << "const int index = block*" << gVecSize << ";";
939 tab(n+3,fout); fout << "int count = fullcount%" << gVecSize << ";" ;
940 printlines(n+3, fZone3Code, fout);
941 printLoopGraph (n+3,fout);
942 tab(n+2,fout); fout << "}";
943 tab(n+1,fout); fout << "}";
944 }*/
945
946 void Klass::printComputeMethodOpenMP(int n, ostream& fout)
947 {
948 // in openMP mode we need to split loops in smaller pieces not larger
949 // than gVecSize and add OpenMP pragmas
950 tab(n+1,fout); fout << subst("virtual void compute (int fullcount, $0** input, $0** output) {", xfloat());
951 printlines(n+2, fZone1Code, fout);
952 printlines(n+2, fZone2Code, fout);
953 tab(n+2,fout); fout << "#pragma omp parallel";
954 printdecllist(n+3, "firstprivate", fFirstPrivateDecl, fout);
955
956 tab(n+2,fout); fout << "{";
957 if (!fZone2bCode.empty()) {
958 tab(n+3,fout); fout << "#pragma omp single";
959 tab(n+3,fout); fout << "{";
960 printlines(n+4, fZone2bCode, fout);
961 tab(n+3,fout); fout << "}";
962 }
963
964 tab(n+3,fout); fout << "for (int index = 0; index < fullcount; index += " << gVecSize << ") {";
965 tab(n+4,fout); fout << "int count = min ("<< gVecSize << ", fullcount-index);";
966
967 printlines (n+4, fZone3Code, fout);
968 printLoopGraphOpenMP (n+4,fout);
969
970 tab(n+3,fout); fout << "}";
971
972 tab(n+2,fout); fout << "}";
973 tab(n+1,fout); fout << "}";
974 }
975
976 /*
977 void Klass::printComputeMethodScheduler (int n, ostream& fout)
978 {
979 tab(n+1,fout); fout << subst("virtual void compute (int fullcount, $0** input, $0** output) {", xfloat());
980 printlines (n+2, fZone1Code, fout);
981 printlines (n+2, fZone2Code, fout);
982
983 // Init input and output
984 tab(n+2,fout); fout << "// Init input and output";
985 printlines (n+2, fZone3aCode, fout);
986 printlines (n+2, fZone3bCode, fout);
987
988 tab(n+2,fout); fout << "// Init graph state";
989 tab(n+2,fout); fout << "initState(fTasksList);";
990 tab(n+2,fout); fout << "bool is_finished = false;";
991 tab(n+2,fout); fout << "unsigned int index_in = 0;";
992 tab(n+2,fout); fout << "unsigned int index_out = 0;";
993 tab(n+2,fout); fout << "int count = min ("<< gVecSize << ", fullcount);";
994
995 tab(n+2,fout); fout << "InitSchedulingMap();";
996 tab(n+2,fout); fout << "#pragma omp parallel";
997 printdecllist(n+3, "firstprivate", fFirstPrivateDecl, fout);
998
999 tab(n+2,fout); fout << "{";
1000 tab(n+3,fout); fout << "while (!is_finished) {";
1001 tab(n+4,fout); fout << "Task* task = searchTaskToAcquire(fTasksList);";
1002 tab(n+4,fout); fout << "if (task != NULL) {";
1003 tab(n+5,fout); fout << "bool last_cycle_for_thread = false;";
1004 tab(n+5,fout); fout << "do {";
1005 tab(n+6,fout); fout << "AddTaskToScheduling(task);";
1006 tab(n+6,fout); fout << "switch (task->fNum) {";
1007
1008 // DSP tasks
1009 printLoopGraph (n+7,fout);
1010
1011 // Input task
1012 tab(n+7, fout); fout << "case " << gTaskCount++ << ": { ";
1013 printlines (n+8, fZone6Code, fout);
1014 tab(n+8, fout); fout << "index_in += count;";
1015 tab(n+8, fout); fout << "last_cycle_for_thread = (index_in > fullcount);";
1016 tab(n+8, fout); fout << "break;";
1017 tab(n+7, fout); fout << "} ";
1018
1019 // Output task
1020 tab(n+7, fout); fout << "case " << gTaskCount++ << ": { ";
1021 printlines (n+8, fZone7Code, fout);
1022 tab(n+8, fout); fout << "index_out += count;";
1023 tab(n+8, fout); fout << "last_cycle_for_thread = (index_out > fullcount);";
1024 tab(n+8, fout); fout << "break;";
1025 tab(n+7, fout); fout << "} ";
1026
1027 // End task
1028 tab(n+7, fout); fout << "case " << gTaskCount++ << ": { ";
1029 tab(n+8, fout); fout << "is_finished = ((index_in >= fullcount) && (index_out >= fullcount));";
1030 tab(n+8, fout); fout << "break;";
1031 tab(n+7, fout); fout << "} ";
1032
1033 tab(n+6,fout); fout << "}";
1034 tab(n+6,fout); fout << "if (last_cycle_for_thread) break;";
1035
1036 tab(n+5,fout); fout << "} while ((task = task->concludeAndTryToAcquireNext()) != NULL);";
1037 tab(n+4,fout); fout << "}";
1038 tab(n+3,fout); fout << "}";
1039 tab(n+2,fout); fout << "}";
1040 tab(n+2,fout); fout << "PrintSchedulingMap();";
1041 tab(n+1,fout); fout << "}";
1042 }
1043 */
1044
1045 void Klass::printComputeMethodScheduler (int n, ostream& fout)
1046 {
1047 tab(n+1,fout); fout << "void display() {";
1048 tab(n+2,fout); fout << "fGraph.Display();";
1049 tab(n+1,fout); fout << "}";
1050
1051 tab(n+1,fout); fout << subst("virtual void compute (int fullcount, $0** input, $0** output) {", xfloat());
1052
1053 tab(n+2,fout); fout << "GetRealTime();";
1054
1055 tab(n+2,fout); fout << "this->input = input;";
1056 tab(n+2,fout); fout << "this->output = output;";
1057
1058 tab(n+2,fout); fout << "StartMeasure();";
1059
1060 tab(n+2,fout); fout << "for (fIndex = 0; fIndex < fullcount; fIndex += " << gVecSize << ") {";
1061
1062 tab(n+3,fout); fout << "fFullCount = min ("<< gVecSize << ", fullcount-fIndex);";
1063 tab(n+3,fout); fout << "TaskQueue::Init();";
1064 printlines (n+3, fZone2cCode, fout);
1065
1066 tab(n+3,fout); fout << "fIsFinished = false;";
1067 tab(n+3,fout); fout << "fThreadPool->SignalAll(fDynamicNumThreads - 1, this);";
1068 tab(n+3,fout); fout << "computeThread(0);";
1069 tab(n+3,fout); fout << "while (!fThreadPool->IsFinished()) {}";
1070
1071 tab(n+2,fout); fout << "}";
1072
1073 tab(n+2,fout); fout << "StopMeasure(fStaticNumThreads, fDynamicNumThreads);";
1074
1075 tab(n+1,fout); fout << "}";
1076
1077 tab(n+1,fout); fout << "void computeThread(int cur_thread) {";
1078 printlines (n+2, fZone1Code, fout);
1079 printlines (n+2, fZone2Code, fout);
1080
1081 tab(n+2,fout); fout << "// Init graph state";
1082
1083 tab(n+2,fout); fout << "{";
1084 tab(n+3,fout); fout << "TaskQueue taskqueue(cur_thread);";
1085 tab(n+3,fout); fout << "int tasknum = -1;";
1086 tab(n+3,fout); fout << "int count = fFullCount;";
1087
1088 // Init input and output
1089 tab(n+3,fout); fout << "// Init input and output";
1090 printlines (n+3, fZone3Code, fout);
1091
1092 tab(n+3,fout); fout << "while (!fIsFinished) {";
1093 tab(n+4,fout); fout << "switch (tasknum) {";
1094
1095 // Work stealing task
1096 tab(n+5, fout); fout << "case WORK_STEALING_INDEX: { ";
1097 tab(n+6, fout); fout << "tasknum = TaskQueue::GetNextTask(cur_thread, fDynamicNumThreads);";
1098 tab(n+6, fout); fout << "break;";
1099 tab(n+5, fout); fout << "} ";
1100
1101 // End task
1102 tab(n+5, fout); fout << "case LAST_TASK_INDEX: { ";
1103 tab(n+6, fout); fout << "fIsFinished = true;";
1104 tab(n+6, fout); fout << "break;";
1105 tab(n+5, fout); fout << "} ";
1106
1107 gTaskCount = START_TASK_INDEX;
1108
1109 // DSP tasks
1110 printLoopGraphScheduler (n+5,fout);
1111
1112 tab(n+4,fout); fout << "}";
1113 tab(n+3,fout); fout << "}";
1114 tab(n+2,fout); fout << "}";
1115 tab(n+1,fout); fout << "}";
1116 }
1117
1118 /**
1119 * Print an auxillary C++ class corresponding to an integer init signal
1120 */
1121 void SigIntGenKlass::println(int n, ostream& fout)
1122 {
1123 list<Klass* >::iterator k;
1124
1125 tab(n,fout); fout << "class " << fKlassName << " {";
1126
1127 tab(n,fout); fout << " private:";
1128 tab(n+1,fout); fout << "int \tfSamplingFreq;";
1129
1130 for (k = fSubClassList.begin(); k != fSubClassList.end(); k++) (*k)->println(n+1, fout);
1131
1132 printlines(n+1, fDeclCode, fout);
1133
1134 tab(n,fout); fout << " public:";
1135
1136 tab(n+1,fout); fout << "int getNumInputs() \t{ "
1137 << "return " << fNumInputs << "; }";
1138 tab(n+1,fout); fout << "int getNumOutputs() \t{ "
1139 << "return " << fNumOutputs << "; }";
1140
1141 tab(n+1,fout); fout << "void init(int samplingFreq) {";
1142 tab(n+2,fout); fout << "fSamplingFreq = samplingFreq;";
1143 printlines(n+2, fInitCode, fout);
1144 tab(n+1,fout); fout << "}";
1145
1146 tab(n+1,fout); fout << "void fill (int count, int output[]) {";
1147 printlines (n+2, fZone1Code, fout);
1148 printlines (n+2, fZone2Code, fout);
1149 printlines (n+2, fZone2bCode, fout);
1150 printlines (n+2, fZone3Code, fout);
1151 printLoopGraphInternal (n+2,fout);
1152 tab(n+1,fout); fout << "}";
1153
1154 tab(n,fout); fout << "};\n" << endl;
1155 }
1156
1157 /**
1158 * Print an auxillary C++ class corresponding to an float init signal
1159 */
1160 void SigFloatGenKlass::println(int n, ostream& fout)
1161 {
1162 list<Klass* >::iterator k;
1163
1164 tab(n,fout); fout << "class " << fKlassName << " {";
1165
1166 tab(n,fout); fout << " private:";
1167 tab(n+1,fout); fout << "int \tfSamplingFreq;";
1168
1169 for (k = fSubClassList.begin(); k != fSubClassList.end(); k++) (*k)->println(n+1, fout);
1170
1171 printlines(n+1, fDeclCode, fout);
1172
1173 tab(n,fout); fout << " public:";
1174
1175 tab(n+1,fout); fout << "int getNumInputs() \t{ "
1176 << "return " << fNumInputs << "; }";
1177 tab(n+1,fout); fout << "int getNumOutputs() \t{ "
1178 << "return " << fNumOutputs << "; }";
1179
1180 tab(n+1,fout); fout << "void init(int samplingFreq) {";
1181 tab(n+2,fout); fout << "fSamplingFreq = samplingFreq;";
1182 printlines(n+2, fInitCode, fout);
1183 tab(n+1,fout); fout << "}";
1184
1185 tab(n+1,fout); fout << subst("void fill (int count, $0 output[]) {", ifloat());
1186 printlines (n+2, fZone1Code, fout);
1187 printlines (n+2, fZone2Code, fout);
1188 printlines (n+2, fZone2bCode, fout);
1189 printlines (n+2, fZone3Code, fout);
1190 printLoopGraphInternal(n+2,fout);
1191 tab(n+1,fout); fout << "}";
1192
1193 tab(n,fout); fout << "};\n" << endl;
1194 }
1195
1196 static void merge (set<string>& dst, set<string>& src)
1197 {
1198 set<string>::iterator i;
1199 for (i = src.begin(); i != src.end(); i++) dst.insert(*i);
1200 }
1201
1202 void Klass::collectIncludeFile(set<string>& S)
1203 {
1204 list<Klass* >::iterator k;
1205
1206 for (k = fSubClassList.begin(); k != fSubClassList.end(); k++) (*k)->collectIncludeFile(S);
1207 merge(S, fIncludeFileSet);
1208 }
1209
1210 void Klass::collectLibrary(set<string>& S)
1211 {
1212 list<Klass* >::iterator k;
1213
1214 for (k = fSubClassList.begin(); k != fSubClassList.end(); k++) (*k)->collectLibrary(S);
1215 merge(S, fLibrarySet);
1216 }