1 /************************************************************************
2 ************************************************************************
4 Copyright (C) 2003-2004 GRAME, Centre National de Creation Musicale
5 ---------------------------------------------------------------------
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 ************************************************************************
20 ************************************************************************/
24 /**********************************************************************
25 - klass.cpp : class C++ a remplir (projet FAUST) -
30 17-10-2001 : implementation initiale (yo)
31 18-10-2001 : Ajout de getFreshID (yo)
32 02-11-2001 : Ajout de sous classes (yo)
33 06-11-2001 : modif impression des classes (yo)
35 ***********************************************************************/
45 #include "smartpointer.hh"
51 #include "recursivness.hh"
54 extern bool gVectorSwitch
;
55 extern bool gDeepFirstSwitch
;
56 extern bool gOpenMPSwitch
;
57 extern bool gOpenMPLoop
;
58 extern bool gSchedulerSwitch
;
60 extern bool gUIMacroSwitch
;
61 extern int gVectorLoopVariant
;
62 extern bool gGroupTaskSwitch
;
64 extern map
<Tree
, set
<Tree
> > gMetaDataSet
;
65 static int gTaskCount
= 0;
67 void tab (int n
, ostream
& fout
)
70 while (n
--) fout
<< '\t';
73 bool Klass::fNeedPowerDef
= false;
76 * Store the loop used to compute a signal
78 void Klass::setLoopProperty(Tree sig
, Loop
* l
)
80 fLoopProperty
.set(sig
,l
);
84 * Returns the loop used to compute a signal
86 bool Klass::getLoopProperty(Tree sig
, Loop
*& l
)
88 return fLoopProperty
.get(sig
, l
);
92 * Open a non-recursive loop on top of the stack of open loops.
93 * @param size the number of iterations of the loop
95 void Klass::openLoop(const string
& size
)
97 fTopLoop
= new Loop(fTopLoop
, size
);
98 //cerr << "\nOPEN SHARED LOOP(" << size << ") ----> " << fTopLoop << endl;
102 * Open a recursive loop on top of the stack of open loops.
103 * @param recsymbol the recursive symbol defined in this loop
104 * @param size the number of iterations of the loop
106 void Klass::openLoop(Tree recsymbol
, const string
& size
)
108 fTopLoop
= new Loop(recsymbol
, fTopLoop
, size
);
109 //cerr << "\nOPEN REC LOOP(" << *recsymbol << ", " << size << ") ----> " << fTopLoop << endl;
113 * Close the top loop and either keep it
114 * or absorb it within its enclosing loop.
116 void Klass::closeLoop(Tree sig
)
120 fTopLoop
= l
->fEnclosingLoop
;
123 //l->println(4, cerr);
126 Tree S
= symlist(sig
);
127 //cerr << "CLOSE LOOP :" << l << " with symbols " << *S << endl;
128 if (l
->isEmpty() || fTopLoop
->hasRecDependencyIn(S
)) {
129 //cout << " will absorb" << endl;
130 // empty or dependent loop -> absorbed by enclosing one
131 //cerr << "absorbed by : " << fTopLoop << endl;
135 // cout << " will NOT absorb" << endl;
136 // we have an independent loop
137 setLoopProperty(sig
,l
); // associate the signal
138 fTopLoop
->fBackwardLoopDependencies
.insert(l
);
139 // we need to indicate that all recursive symbols defined
140 // in this loop are defined in this loop
141 for (Tree lsym
=l
->fRecSymbolSet
; !isNil(lsym
); lsym
=tl(lsym
)) {
142 this->setLoopProperty(hd(lsym
), l
);
143 //cerr << "loop " << l << " defines " << *hd(lsym) << endl;
146 //cerr << "\n" << endl;
150 * Print a list of lines.
152 void printlines(int n
, list
<string
>& lines
, ostream
& fout
)
154 list
<string
>::iterator s
;
155 for (s
= lines
.begin(); s
!= lines
.end(); s
++) {
156 tab(n
, fout
); fout
<< *s
;
161 * Print a list of elements (e1, e2,...)
163 void printdecllist(int n
, const string
& decl
, list
<string
>& content
, ostream
& fout
)
165 if (!content
.empty()) {
166 list
<string
>::iterator s
;
168 tab(n
, fout
); fout
<< decl
;
170 for (s
= content
.begin(); s
!= content
.end(); s
++) {
179 * Print the required C++ libraries as comments in source code
181 void Klass::printLibrary(ostream
& fout
)
184 set
<string
>::iterator f
;
188 fout
<< "/* link with ";
189 for (f
= S
.begin(), sep
=": "; f
!= S
.end(); f
++, sep
= ", ") {
196 * Print the required include files
198 void Klass::printIncludeFile(ostream
& fout
)
201 set
<string
>::iterator f
;
204 fout
<< "#include <omp.h>" << "\n";
207 collectIncludeFile(S
);
208 for (f
= S
.begin(); f
!= S
.end(); f
++) {
209 fout
<< "#include " << *f
<< "\n";
214 * Print additional functions required by the generated code
216 void Klass::printAdditionalCode(ostream
& fout
)
219 // Add faustpower definition to C++ code
220 fout
<< "#include <cmath>" << endl
;
221 fout
<< "template <int N> inline float faustpower(float x) { return powf(x,N); } " << endl
;
222 fout
<< "template <int N> inline double faustpower(double x) { return pow(x,N); }" << endl
;
223 fout
<< "template <int N> inline int faustpower(int x) { return faustpower<N/2>(x) * faustpower<N-N/2>(x); } " << endl
;
224 fout
<< "template <> inline int faustpower<0>(int x) { return 1; }" << endl
;
225 fout
<< "template <> inline int faustpower<1>(int x) { return x; }" << endl
;
231 * Print metadata declaration
233 void Klass::printMetadata(int n
, const map
<Tree
, set
<Tree
> >& S
, ostream
& fout
)
235 tab(n
,fout
); fout
<< "static void metadata(Meta* m) \t{ ";
237 for (map
<Tree
, set
<Tree
> >::iterator i
= gMetaDataSet
.begin(); i
!= gMetaDataSet
.end(); i
++) {
238 if (i
->first
!= tree("author")) {
239 tab(n
+1,fout
); fout
<< "m->declare(\"" << *(i
->first
) << "\", " << **(i
->second
.begin()) << ");";
241 for (set
<Tree
>::iterator j
= i
->second
.begin(); j
!= i
->second
.end(); j
++) {
242 if (j
== i
->second
.begin()) {
243 tab(n
+1,fout
); fout
<< "m->declare(\"" << *(i
->first
) << "\", " << **j
<< ");" ;
245 tab(n
+1,fout
); fout
<< "m->declare(\"" << "contributor" << "\", " << **j
<< ");";
251 tab(n
,fout
); fout
<< "}" << endl
;
254 inline bool isElement(const set
<Loop
*>& S
, Loop
* l
)
256 return S
.find(l
)!= S
.end();
260 * Print a loop graph deep first
262 void Klass::printLoopDeepFirst(int n
, ostream
& fout
, Loop
* l
, set
<Loop
*>& visited
)
264 // avoid printing already printed loops
265 if (isElement(visited
, l
)) return;
267 // remember we have printed this loop
270 // print the dependencies loops (that need to be computed before this one)
271 for (lset::const_iterator p
=l
->fBackwardLoopDependencies
.begin(); p
!=l
->fBackwardLoopDependencies
.end(); p
++) {
272 printLoopDeepFirst(n
, fout
, *p
, visited
);
274 // the print the loop itself
276 tab(n
, fout
); fout
<< "// LOOP " << l
<< ", ORDER " << l
->fOrder
<< endl
;
277 l
->println(n
+1, fout
);
281 * Compute how many time each loop is used in a DAG
283 static void computeUseCount(Loop
* l
)
286 if (l
->fUseCount
== 1) {
287 for (lset::iterator p
=l
->fBackwardLoopDependencies
.begin(); p
!=l
->fBackwardLoopDependencies
.end(); p
++) {
294 * Group together sequences of loops
296 static void groupSeqLoops(Loop
* l
)
298 int n
= l
->fBackwardLoopDependencies
.size();
302 Loop
* f
= *(l
->fBackwardLoopDependencies
.begin());
303 if (f
->fUseCount
== 1) {
311 for (lset::iterator p
=l
->fBackwardLoopDependencies
.begin(); p
!=l
->fBackwardLoopDependencies
.end(); p
++) {
317 #define WORK_STEALING_INDEX 0
318 #define LAST_TASK_INDEX 1
319 #define START_TASK_INDEX LAST_TASK_INDEX + 1
321 #define START_TASK_MAX 2
323 void Klass::buildTasksList()
327 if (gGroupTaskSwitch
) {
328 computeUseCount(fTopLoop
);
329 groupSeqLoops(fTopLoop
);
332 sortGraph(fTopLoop
, G
);
333 int index_task
= START_TASK_INDEX
;
335 addDeclCode("TaskGraph fGraph;");
336 addDeclCode("FAUSTFLOAT** input;");
337 addDeclCode("FAUSTFLOAT** output;");
338 addDeclCode("volatile bool fIsFinished;");
339 addDeclCode("int fFullCount;");
340 addDeclCode("int fIndex;");
341 addDeclCode("DSPThreadPool* fThreadPool;");
342 addDeclCode("int fStaticNumThreads;");
343 addDeclCode("int fDynamicNumThreads;");
345 // Compute forward dependencies
346 for (int l
=G
.size()-1; l
>=0; l
--) {
347 for (lset::const_iterator p
=G
[l
].begin(); p
!=G
[l
].end(); p
++) {
348 for (lset::const_iterator p1
= (*p
)->fBackwardLoopDependencies
.begin(); p1
!=(*p
)->fBackwardLoopDependencies
.end(); p1
++) {
349 (*p1
)->fForwardLoopDependencies
.insert((*p
));
351 (*p
)->fIndex
= index_task
;
356 // Compute ready tasks list
357 vector
<int> task_num
;
358 for (int l
=G
.size()-1; l
>=0; l
--) {
359 lset::const_iterator next
;
360 for (lset::const_iterator p
=G
[l
].begin(); p
!=G
[l
].end(); p
++) {
361 if ((*p
)->fBackwardLoopDependencies
.size() == 0) {
362 task_num
.push_back((*p
)->fIndex
);
367 if (task_num
.size() < START_TASK_MAX
) {
369 // Push ready tasks thread 0, execute one task directly
371 addZone3("if (cur_thread == 0) {");
374 for (int l
=G
.size()-1; l
>=0; l
--) {
375 lset::const_iterator next
;
376 for (lset::const_iterator p
=G
[l
].begin(); p
!=G
[l
].end(); p
++) {
377 if ((*p
)->fBackwardLoopDependencies
.size() == 0) {
381 addZone3(subst(" taskqueue.PushHead($0);", T((*p
)->fIndex
)));
388 addZone3(subst(" tasknum = $0;", T(keep
->fIndex
)));
391 addZone3("} else {");
392 addZone3(" tasknum = TaskQueue::GetNextTask(cur_thread, fDynamicNumThreads);");
397 // Cut ready tasks list and have each thread (dynamically) use a subpart
398 addZone3(subst("int task_list_size = $0;", T((int)task_num
.size())));
400 buf
<< "int task_list[" << task_num
.size() << "] = {";
401 for(size_t i
= 0; i
< task_num
.size(); i
++) {
403 if (i
!= (task_num
.size() - 1))
409 addZone3("taskqueue.InitTaskList(task_list_size, task_list, fDynamicNumThreads, cur_thread, tasknum);");
412 // Last stage connected to end task
413 if (G
[0].size() > 1) {
414 addZone2c("// Initialize end task, if more than one input");
415 addZone2c(subst("fGraph.InitTask($0,$1);", T(LAST_TASK_INDEX
), T((int)G
[0].size())));
417 addZone2c("// End task has only one input, so will be directly activated");
420 // Compute init section
421 addZone2c("// Only initialize taks with more than one input");
422 for (int l
=G
.size()-1; l
>=0; l
--) {
423 for (lset::const_iterator p
=G
[l
].begin(); p
!=G
[l
].end(); p
++) {
424 if ((*p
)->fBackwardLoopDependencies
.size() > 1) { // Only initialize taks with more than 1 input, since taks with one input are "directly" activated.
425 addZone2c(subst("fGraph.InitTask($0,$1);", T(START_TASK_INDEX
+ gTaskCount
++), T((int)(*p
)->fBackwardLoopDependencies
.size())));
432 addInitCode("fStaticNumThreads = get_max_cpu();");
433 addInitCode("fDynamicNumThreads = getenv(\"OMP_NUM_THREADS\") ? atoi(getenv(\"OMP_NUM_THREADS\")) : fStaticNumThreads;");
434 addInitCode("fThreadPool = DSPThreadPool::Init();");
435 addInitCode("fThreadPool->StartAll(fStaticNumThreads - 1, false);");
441 * Print the loop graph (used for vector code)
443 void Klass::printLoopGraphVector(int n
, ostream
& fout
)
445 if (gGroupTaskSwitch
) {
446 computeUseCount(fTopLoop
);
447 groupSeqLoops(fTopLoop
);
451 sortGraph(fTopLoop
, G
);
455 if (gVectorSwitch
&& gDeepFirstSwitch
) {
457 printLoopDeepFirst(n
, fout
, fTopLoop
, visited
);
463 for (int l
=G
.size()-1; l
>=0; l
--) {
464 if (gVectorSwitch
) { tab(n
, fout
); fout
<< "// SECTION : " << G
.size() - l
; }
465 for (lset::const_iterator p
=G
[l
].begin(); p
!=G
[l
].end(); p
++) {
466 (*p
)->println(n
, fout
);
472 * Print the loop graph as a serie of parallel loops
474 void Klass::printLoopGraphOpenMP(int n
, ostream
& fout
)
476 if (gGroupTaskSwitch
) {
477 computeUseCount(fTopLoop
);
478 groupSeqLoops(fTopLoop
);
482 sortGraph(fTopLoop
, G
);
484 // OpenMP mode : add OpenMP directives
485 for (int l
=G
.size()-1; l
>=0; l
--) {
486 tab(n
, fout
); fout
<< "// SECTION : " << G
.size() - l
;
487 printLoopLevelOpenMP(n
, G
.size() - l
, G
[l
], fout
);
492 * Print the loop graph as a serie of parallel loops
494 void Klass::printLoopGraphScheduler(int n
, ostream
& fout
)
496 if (gGroupTaskSwitch
) {
497 computeUseCount(fTopLoop
);
498 groupSeqLoops(fTopLoop
);
502 sortGraph(fTopLoop
, G
);
504 // OpenMP mode : add OpenMP directives
505 for (int l
=G
.size()-1; l
>0; l
--) {
506 tab(n
, fout
); fout
<< "// SECTION : " << G
.size() - l
;
507 printLoopLevelScheduler(n
, G
.size() - l
, G
[l
], fout
);
510 printLastLoopLevelScheduler(n
, G
.size(), G
[0], fout
);
515 * Print the loop graph in dot format
517 void Klass::printGraphDotFormat(ostream
& fout
)
520 sortGraph(fTopLoop
, G
);
522 fout
<< "strict digraph loopgraph {" << endl
;
523 fout
<< '\t' << "rankdir=LR;" << endl
;
524 fout
<< '\t' << "node[color=blue, fillcolor=lightblue, style=filled, fontsize=9];" << endl
;
526 int lnum
= 0; // used for loop numbers
527 // for each level of the graph
528 for (int l
=G
.size()-1; l
>=0; l
--) {
529 // for each task in the level
530 for (lset::const_iterator t
=G
[l
].begin(); t
!=G
[l
].end(); t
++) {
531 // print task label "Lxxx : 0xffffff"
532 fout
<< '\t' << 'L'<<(*t
)<<"[label=<<font face=\"verdana,bold\">L"<<lnum
++<<"</font> : "<<(*t
)<<">];"<<endl
;
533 // for each source of the task
534 for (lset::const_iterator src
= (*t
)->fBackwardLoopDependencies
.begin(); src
!=(*t
)->fBackwardLoopDependencies
.end(); src
++) {
535 // print the connection Lxxx -> Lyyy;
536 fout
<< '\t' << 'L'<<(*src
)<<"->"<<'L'<<(*t
)<<';'<<endl
;
544 * Print the loop graph (used for internals classes)
546 void Klass::printLoopGraphInternal(int n
, ostream
& fout
)
549 sortGraph(fTopLoop
, G
);
552 for (int l
=G
.size()-1; l
>=0; l
--) {
553 if (gVectorSwitch
) { tab(n
, fout
); fout
<< "// SECTION : " << G
.size() - l
; }
554 for (lset::const_iterator p
=G
[l
].begin(); p
!=G
[l
].end(); p
++) {
555 (*p
)->printoneln(n
, fout
);
561 * Print the loop graph (scalar mode)
563 void Klass::printLoopGraphScalar(int n
, ostream
& fout
)
565 fTopLoop
->printoneln(n
, fout
);
569 * returns true if all the loops are non recursive
571 static bool nonRecursiveLevel(const lset
& L
)
573 for (lset::const_iterator p
=L
.begin(); p
!=L
.end(); p
++) {
574 if ((*p
)->fIsRecursive
) return false;
580 * Print the 'level' of the loop graph as a set of
583 void Klass::printLoopLevelOpenMP(int n
, int lnum
, const lset
& L
, ostream
& fout
)
585 if (nonRecursiveLevel(L
) && L
.size()==1) {
586 for (lset::const_iterator p
=L
.begin(); p
!=L
.end(); p
++) {
587 if ((*p
)->isEmpty() == false) {
589 (*p
)->printParLoopln(n
, fout
);
591 tab(n
, fout
); fout
<< "#pragma omp single ";
592 tab(n
, fout
); fout
<< "{ ";
593 (*p
)->println(n
+1, fout
);
594 tab(n
, fout
); fout
<< "} ";
599 } else if (L
.size() > 1) {
600 tab(n
, fout
); fout
<< "#pragma omp sections ";
601 tab(n
, fout
); fout
<< "{ ";
602 for (lset::const_iterator p
=L
.begin(); p
!=L
.end(); p
++) {
603 tab(n
+1, fout
); fout
<< "#pragma omp section ";
604 tab(n
+1, fout
); fout
<< "{";
605 (*p
)->println(n
+2, fout
);
606 tab(n
+1, fout
); fout
<< "} ";
608 tab(n
, fout
); fout
<< "} ";
609 } else if (L
.size() == 1 && !(*L
.begin())->isEmpty()) {
610 tab(n
, fout
); fout
<< "#pragma omp single ";
611 tab(n
, fout
); fout
<< "{ ";
612 for (lset::const_iterator p
=L
.begin(); p
!=L
.end(); p
++) {
613 (*p
)->println(n
+1, fout
);
615 tab(n
, fout
); fout
<< "} ";
620 * Print the 'level' of the loop graph as a set of
623 void Klass::printLastLoopLevelScheduler(int n
, int lnum
, const lset
& L
, ostream
& fout
)
625 if (nonRecursiveLevel(L
) && L
.size() == 1 && !(*L
.begin())->isEmpty()) {
627 lset::const_iterator p
=L
.begin();
628 tab(n
, fout
); fout
<< "case " << gTaskCount
++ << ": { ";
629 (*p
)->println(n
+1, fout
);
630 tab(n
+1, fout
); fout
<< "tasknum = LAST_TASK_INDEX;";
631 tab(n
+1, fout
); fout
<< "break;";
632 tab(n
, fout
); fout
<< "} ";
634 } else if (L
.size() > 1) {
636 for (lset::const_iterator p
=L
.begin(); p
!=L
.end(); p
++) {
637 tab(n
, fout
); fout
<< "case " << gTaskCount
++ << ": { ";
638 (*p
)->println(n
+1, fout
);
639 tab(n
+1, fout
); fout
<< "fGraph.ActivateOneOutputTask(taskqueue, LAST_TASK_INDEX, tasknum);";
640 tab(n
+1, fout
); fout
<< "break;";
641 tab(n
, fout
); fout
<< "} ";
644 } else if (L
.size() == 1 && !(*L
.begin())->isEmpty()) {
646 lset::const_iterator p
=L
.begin();
647 tab(n
, fout
); fout
<< "case " << gTaskCount
++ << ": { ";
648 (*p
)->println(n
+1, fout
);
649 tab(n
+1, fout
); fout
<< "tasknum = LAST_TASK_INDEX;";
650 tab(n
+1, fout
); fout
<< "break;";
651 tab(n
, fout
); fout
<< "} ";
656 void Klass::printOneLoopScheduler(lset::const_iterator p
, int n
, ostream
& fout
)
658 tab(n
, fout
); fout
<< "case " << gTaskCount
++ << ": { ";
659 (*p
)->println(n
+1, fout
);
662 if ((*p
)->fForwardLoopDependencies
.size() == 1) {
664 lset::const_iterator p1
= (*p
)->fForwardLoopDependencies
.begin();
665 if ((*p1
)->fBackwardLoopDependencies
.size () == 1) {
666 tab(n
+1, fout
); fout
<< subst("tasknum = $0;", T((*p1
)->fIndex
));
668 tab(n
+1, fout
); fout
<< subst("fGraph.ActivateOneOutputTask(taskqueue, $0, tasknum);", T((*p1
)->fIndex
));
674 // Find one output with only one backward dependencies
675 for (lset::const_iterator p1
= (*p
)->fForwardLoopDependencies
.begin(); p1
!=(*p
)->fForwardLoopDependencies
.end(); p1
++) {
676 if ((*p1
)->fBackwardLoopDependencies
.size () == 1) {
683 tab(n
+1, fout
); fout
<< "tasknum = WORK_STEALING_INDEX;";
686 for (lset::const_iterator p1
= (*p
)->fForwardLoopDependencies
.begin(); p1
!=(*p
)->fForwardLoopDependencies
.end(); p1
++) {
687 if ((*p1
)->fBackwardLoopDependencies
.size () == 1) { // Task is the only input
689 tab(n
+1, fout
); fout
<< subst("taskqueue.PushHead($0);", T((*p1
)->fIndex
));
693 tab(n
+1, fout
); fout
<< subst("fGraph.ActivateOutputTask(taskqueue, $0, tasknum);", T((*p1
)->fIndex
));
695 tab(n
+1, fout
); fout
<< subst("fGraph.ActivateOutputTask(taskqueue, $0);", T((*p1
)->fIndex
));
701 tab(n
+1, fout
); fout
<< subst("tasknum = $0;", T(keep
->fIndex
)); // Last one
703 tab(n
+1, fout
); fout
<< "fGraph.GetReadyTask(taskqueue, tasknum);"; // Last one
707 tab(n
+1, fout
); fout
<< "break;";
708 tab(n
, fout
); fout
<< "} ";
712 * Print the 'level' of the loop graph as a set of
716 void Klass::printLoopLevelScheduler(int n
, int lnum
, const lset
& L
, ostream
& fout
)
718 if (nonRecursiveLevel(L
) && L
.size() == 1 && !(*L
.begin())->isEmpty()) {
719 printOneLoopScheduler(L
.begin(), n
, fout
);
720 } else if (L
.size() > 1) {
721 for (lset::const_iterator p
= L
.begin(); p
!= L
.end(); p
++) {
722 printOneLoopScheduler(p
, n
, fout
);
724 } else if (L
.size() == 1 && !(*L
.begin())->isEmpty()) {
725 printOneLoopScheduler(L
.begin(), n
, fout
);
730 * Print a full C++ class corresponding to a Faust dsp
732 void Klass::println(int n
, ostream
& fout
)
734 list
<Klass
* >::iterator k
;
736 tab(n
,fout
); fout
<< "#define FAUSTCLASS "<< fKlassName
<< endl
;
738 if (gSchedulerSwitch
) {
739 tab(n
,fout
); fout
<< "class " << fKlassName
<< " : public " << fSuperKlassName
<< ", public Runnable {";
741 tab(n
,fout
); fout
<< "class " << fKlassName
<< " : public " << fSuperKlassName
<< " {";
744 if (gUIMacroSwitch
) {
745 tab(n
,fout
); fout
<< " public:";
747 tab(n
,fout
); fout
<< " private:";
750 for (k
= fSubClassList
.begin(); k
!= fSubClassList
.end(); k
++) (*k
)->println(n
+1, fout
);
752 printlines(n
+1, fDeclCode
, fout
);
754 tab(n
,fout
); fout
<< " public:";
756 printMetadata(n
+1, gMetaDataSet
, fout
);
758 if (gSchedulerSwitch
) {
759 tab(n
+1,fout
); fout
<< "virtual ~" << fKlassName
<< "() \t{ "
760 << "DSPThreadPool::Destroy()"
764 tab(n
+1,fout
); fout
<< "virtual int getNumInputs() \t{ "
765 << "return " << fNumInputs
767 tab(n
+1,fout
); fout
<< "virtual int getNumOutputs() \t{ "
768 << "return " << fNumOutputs
771 tab(n
+1,fout
); fout
<< "static void classInit(int samplingFreq) {";
772 printlines (n
+2, fStaticInitCode
, fout
);
773 tab(n
+1,fout
); fout
<< "}";
775 tab(n
+1,fout
); fout
<< "virtual void instanceInit(int samplingFreq) {";
776 tab(n
+2,fout
); fout
<< "fSamplingFreq = samplingFreq;";
777 printlines (n
+2, fInitCode
, fout
);
778 tab(n
+1,fout
); fout
<< "}";
780 tab(n
+1,fout
); fout
<< "virtual void init(int samplingFreq) {";
781 tab(n
+2,fout
); fout
<< "classInit(samplingFreq);";
782 tab(n
+2,fout
); fout
<< "instanceInit(samplingFreq);";
783 tab(n
+1,fout
); fout
<< "}";
786 tab(n
+1,fout
); fout
<< "virtual void buildUserInterface(UI* interface) {";
787 printlines (n
+2, fUICode
, fout
);
788 tab(n
+1,fout
); fout
<< "}";
790 printComputeMethod(n
, fout
);
792 tab(n
,fout
); fout
<< "};\n" << endl
;
794 printlines(n
, fStaticFields
, fout
);
796 // generate user interface macros if needed
797 if (gUIMacroSwitch
) {
798 tab(n
, fout
); fout
<< "#ifdef FAUST_UIMACROS";
799 tab(n
+1,fout
); fout
<< "#define FAUST_INPUTS " << fNumInputs
;
800 tab(n
+1,fout
); fout
<< "#define FAUST_OUTPUTS " << fNumOutputs
;
801 tab(n
+1,fout
); fout
<< "#define FAUST_ACTIVES " << fNumActives
;
802 tab(n
+1,fout
); fout
<< "#define FAUST_PASSIVES " << fNumPassives
;
803 printlines(n
+1, fUIMacro
, fout
);
804 tab(n
, fout
); fout
<< "#endif";
811 * Print Compute() method according to the various switch
813 void Klass::printComputeMethod(int n
, ostream
& fout
)
815 if (gSchedulerSwitch
) {
816 printComputeMethodScheduler (n
, fout
);
817 } else if (gOpenMPSwitch
) {
818 printComputeMethodOpenMP (n
, fout
);
819 } else if (gVectorSwitch
) {
820 switch (gVectorLoopVariant
) {
821 case 0 : printComputeMethodVectorFaster(n
, fout
); break;
822 case 1 : printComputeMethodVectorSimple(n
, fout
); break;
823 default : cerr
<< "unknown loop variant " << gVectorLoopVariant
<< endl
; exit(1);
826 printComputeMethodScalar(n
, fout
);
830 void Klass::printComputeMethodScalar(int n
, ostream
& fout
)
832 tab(n
+1,fout
); fout
<< subst("virtual void compute (int count, $0** input, $0** output) {", xfloat());
833 printlines (n
+2, fZone1Code
, fout
);
834 printlines (n
+2, fZone2Code
, fout
);
835 printlines (n
+2, fZone2bCode
, fout
);
836 printlines (n
+2, fZone3Code
, fout
);
837 printLoopGraphScalar (n
+2,fout
);
838 tab(n
+1,fout
); fout
<< "}";
842 * Uses loops of constant gVecSize boundary in order to provide the
843 * C compiler with more optimisation opportunities. Improves performances
844 * in general, but not always
846 void Klass::printComputeMethodVectorFaster(int n
, ostream
& fout
)
848 // in vector mode we need to split loops in smaller pieces not larger
850 tab(n
+1,fout
); fout
<< subst("virtual void compute (int fullcount, $0** input, $0** output) {", xfloat());
851 printlines(n
+2, fZone1Code
, fout
);
852 printlines(n
+2, fZone2Code
, fout
);
853 printlines(n
+2, fZone2bCode
, fout
);
855 tab(n
+2,fout
); fout
<< "int index;";
856 tab(n
+2,fout
); fout
<< "for (index = 0; index <= fullcount - " << gVecSize
<< "; index += " << gVecSize
<< ") {";
857 tab(n
+3,fout
); fout
<< "// compute by blocks of " << gVecSize
<< " samples";
858 tab(n
+3,fout
); fout
<< "const int count = " << gVecSize
<< ";";
859 printlines (n
+3, fZone3Code
, fout
);
860 printLoopGraphVector(n
+3,fout
);
861 tab(n
+2,fout
); fout
<< "}";
863 tab(n
+2,fout
); fout
<< "if (index < fullcount) {";
864 tab(n
+3,fout
); fout
<< "// compute the remaining samples if any";
865 tab(n
+3,fout
); fout
<< "int count = fullcount-index;";
866 printlines (n
+3, fZone3Code
, fout
);
867 printLoopGraphVector(n
+3,fout
);
868 tab(n
+2,fout
); fout
<< "}";
869 tab(n
+1,fout
); fout
<< "}";
873 * Simple loop layout, generally less efficient than printComputeMethodVectorFaster
875 void Klass::printComputeMethodVectorSimple(int n
, ostream
& fout
)
877 // in vector mode we need to split loops in smaller pieces not larger
879 tab(n
+1,fout
); fout
<< subst("virtual void compute (int fullcount, $0** input, $0** output) {", xfloat());
880 printlines(n
+2, fZone1Code
, fout
);
881 printlines(n
+2, fZone2Code
, fout
);
882 printlines(n
+2, fZone2bCode
, fout
);
883 tab(n
+2,fout
); fout
<< "for (int index = 0; index < fullcount; index += " << gVecSize
<< ") {";
884 tab(n
+3,fout
); fout
<< "int count = min("<< gVecSize
<< ", fullcount-index);";
885 printlines (n
+3, fZone3Code
, fout
);
886 printLoopGraphVector(n
+3,fout
);
887 tab(n
+2,fout
); fout
<< "}";
888 tab(n
+1,fout
); fout
<< "}";
892 void Klass::printComputeMethodVectorFix0 (int n, ostream& fout)
894 // in vector mode we need to split loops in smaller pieces not larger
896 tab(n+1,fout); fout << "virtual void compute (int fullcount, float** input, float** output) {";
897 printlines(n+2, fZone1Code, fout);
898 printlines(n+2, fZone2Code, fout);
899 printlines(n+2, fZone2bCode, fout);
900 tab(n+2,fout); fout << "for (int index = 0; index < fullcount; index += " << gVecSize << ") {";
901 tab(n+3,fout); fout << "if (fullcount >= index + " << gVecSize << ") {";
902 tab(n+4,fout); fout << "// compute by blocks of " << gVecSize << " samples";
903 tab(n+4,fout); fout << "const int count = " << gVecSize << ";"; // temporaire
904 printlines(n+4, fZone3Code, fout);
905 printLoopGraph (n+4,fout);
906 tab(n+3,fout); fout << "} else if (fullcount > index) {";
907 //tab(n+3,fout); fout << "int count = min ("<< gVecSize << ", fullcount-index);";
908 tab(n+4,fout); fout << "// compute the remaining samples";
909 tab(n+4,fout); fout << "int count = fullcount-index;" ;
910 printlines(n+4, fZone3Code, fout);
911 printLoopGraph (n+4,fout);
912 tab(n+3,fout); fout << "}";
913 tab(n+2,fout); fout << "}";
914 tab(n+1,fout); fout << "}";
917 void Klass::printComputeMethodVectorFix1 (int n, ostream& fout)
919 // in vector mode we need to split loops in smaller pieces not larger
921 tab(n+1,fout); fout << "virtual void compute (int fullcount, float** input, float** output) {";
922 printlines(n+2, fZone1Code, fout);
923 printlines(n+2, fZone2Code, fout);
924 printlines(n+2, fZone2bCode, fout);
926 tab(n+2,fout); fout << "int \tblock;";
927 tab(n+2,fout); fout << "for (block = 0; block < fullcount/" << gVecSize << "; block++) {";
928 tab(n+3,fout); fout << "// compute by blocks of " << gVecSize << " samples";
929 tab(n+3,fout); fout << "const int index = block*" << gVecSize << ";";
930 tab(n+3,fout); fout << "const int count = " << gVecSize << ";"; // temporaire
931 printlines(n+3, fZone3Code, fout);
932 printLoopGraph (n+3,fout);
933 tab(n+2,fout); fout << "}";
935 tab(n+2,fout); fout << "if (fullcount%" << gVecSize << " != 0) {";
936 //tab(n+3,fout); fout << "int count = min ("<< gVecSize << ", fullcount-index);";
937 tab(n+3,fout); fout << "// compute the remaining samples";
938 tab(n+3,fout); fout << "const int index = block*" << gVecSize << ";";
939 tab(n+3,fout); fout << "int count = fullcount%" << gVecSize << ";" ;
940 printlines(n+3, fZone3Code, fout);
941 printLoopGraph (n+3,fout);
942 tab(n+2,fout); fout << "}";
943 tab(n+1,fout); fout << "}";
946 void Klass::printComputeMethodOpenMP(int n
, ostream
& fout
)
948 // in openMP mode we need to split loops in smaller pieces not larger
949 // than gVecSize and add OpenMP pragmas
950 tab(n
+1,fout
); fout
<< subst("virtual void compute (int fullcount, $0** input, $0** output) {", xfloat());
951 printlines(n
+2, fZone1Code
, fout
);
952 printlines(n
+2, fZone2Code
, fout
);
953 tab(n
+2,fout
); fout
<< "#pragma omp parallel";
954 printdecllist(n
+3, "firstprivate", fFirstPrivateDecl
, fout
);
956 tab(n
+2,fout
); fout
<< "{";
957 if (!fZone2bCode
.empty()) {
958 tab(n
+3,fout
); fout
<< "#pragma omp single";
959 tab(n
+3,fout
); fout
<< "{";
960 printlines(n
+4, fZone2bCode
, fout
);
961 tab(n
+3,fout
); fout
<< "}";
964 tab(n
+3,fout
); fout
<< "for (int index = 0; index < fullcount; index += " << gVecSize
<< ") {";
965 tab(n
+4,fout
); fout
<< "int count = min ("<< gVecSize
<< ", fullcount-index);";
967 printlines (n
+4, fZone3Code
, fout
);
968 printLoopGraphOpenMP (n
+4,fout
);
970 tab(n
+3,fout
); fout
<< "}";
972 tab(n
+2,fout
); fout
<< "}";
973 tab(n
+1,fout
); fout
<< "}";
977 void Klass::printComputeMethodScheduler (int n, ostream& fout)
979 tab(n+1,fout); fout << subst("virtual void compute (int fullcount, $0** input, $0** output) {", xfloat());
980 printlines (n+2, fZone1Code, fout);
981 printlines (n+2, fZone2Code, fout);
983 // Init input and output
984 tab(n+2,fout); fout << "// Init input and output";
985 printlines (n+2, fZone3aCode, fout);
986 printlines (n+2, fZone3bCode, fout);
988 tab(n+2,fout); fout << "// Init graph state";
989 tab(n+2,fout); fout << "initState(fTasksList);";
990 tab(n+2,fout); fout << "bool is_finished = false;";
991 tab(n+2,fout); fout << "unsigned int index_in = 0;";
992 tab(n+2,fout); fout << "unsigned int index_out = 0;";
993 tab(n+2,fout); fout << "int count = min ("<< gVecSize << ", fullcount);";
995 tab(n+2,fout); fout << "InitSchedulingMap();";
996 tab(n+2,fout); fout << "#pragma omp parallel";
997 printdecllist(n+3, "firstprivate", fFirstPrivateDecl, fout);
999 tab(n+2,fout); fout << "{";
1000 tab(n+3,fout); fout << "while (!is_finished) {";
1001 tab(n+4,fout); fout << "Task* task = searchTaskToAcquire(fTasksList);";
1002 tab(n+4,fout); fout << "if (task != NULL) {";
1003 tab(n+5,fout); fout << "bool last_cycle_for_thread = false;";
1004 tab(n+5,fout); fout << "do {";
1005 tab(n+6,fout); fout << "AddTaskToScheduling(task);";
1006 tab(n+6,fout); fout << "switch (task->fNum) {";
1009 printLoopGraph (n+7,fout);
1012 tab(n+7, fout); fout << "case " << gTaskCount++ << ": { ";
1013 printlines (n+8, fZone6Code, fout);
1014 tab(n+8, fout); fout << "index_in += count;";
1015 tab(n+8, fout); fout << "last_cycle_for_thread = (index_in > fullcount);";
1016 tab(n+8, fout); fout << "break;";
1017 tab(n+7, fout); fout << "} ";
1020 tab(n+7, fout); fout << "case " << gTaskCount++ << ": { ";
1021 printlines (n+8, fZone7Code, fout);
1022 tab(n+8, fout); fout << "index_out += count;";
1023 tab(n+8, fout); fout << "last_cycle_for_thread = (index_out > fullcount);";
1024 tab(n+8, fout); fout << "break;";
1025 tab(n+7, fout); fout << "} ";
1028 tab(n+7, fout); fout << "case " << gTaskCount++ << ": { ";
1029 tab(n+8, fout); fout << "is_finished = ((index_in >= fullcount) && (index_out >= fullcount));";
1030 tab(n+8, fout); fout << "break;";
1031 tab(n+7, fout); fout << "} ";
1033 tab(n+6,fout); fout << "}";
1034 tab(n+6,fout); fout << "if (last_cycle_for_thread) break;";
1036 tab(n+5,fout); fout << "} while ((task = task->concludeAndTryToAcquireNext()) != NULL);";
1037 tab(n+4,fout); fout << "}";
1038 tab(n+3,fout); fout << "}";
1039 tab(n+2,fout); fout << "}";
1040 tab(n+2,fout); fout << "PrintSchedulingMap();";
1041 tab(n+1,fout); fout << "}";
1045 void Klass::printComputeMethodScheduler (int n
, ostream
& fout
)
1047 tab(n
+1,fout
); fout
<< "void display() {";
1048 tab(n
+2,fout
); fout
<< "fGraph.Display();";
1049 tab(n
+1,fout
); fout
<< "}";
1051 tab(n
+1,fout
); fout
<< subst("virtual void compute (int fullcount, $0** input, $0** output) {", xfloat());
1053 tab(n
+2,fout
); fout
<< "GetRealTime();";
1055 tab(n
+2,fout
); fout
<< "this->input = input;";
1056 tab(n
+2,fout
); fout
<< "this->output = output;";
1058 tab(n
+2,fout
); fout
<< "StartMeasure();";
1060 tab(n
+2,fout
); fout
<< "for (fIndex = 0; fIndex < fullcount; fIndex += " << gVecSize
<< ") {";
1062 tab(n
+3,fout
); fout
<< "fFullCount = min ("<< gVecSize
<< ", fullcount-fIndex);";
1063 tab(n
+3,fout
); fout
<< "TaskQueue::Init();";
1064 printlines (n
+3, fZone2cCode
, fout
);
1066 tab(n
+3,fout
); fout
<< "fIsFinished = false;";
1067 tab(n
+3,fout
); fout
<< "fThreadPool->SignalAll(fDynamicNumThreads - 1, this);";
1068 tab(n
+3,fout
); fout
<< "computeThread(0);";
1069 tab(n
+3,fout
); fout
<< "while (!fThreadPool->IsFinished()) {}";
1071 tab(n
+2,fout
); fout
<< "}";
1073 tab(n
+2,fout
); fout
<< "StopMeasure(fStaticNumThreads, fDynamicNumThreads);";
1075 tab(n
+1,fout
); fout
<< "}";
1077 tab(n
+1,fout
); fout
<< "void computeThread(int cur_thread) {";
1078 printlines (n
+2, fZone1Code
, fout
);
1079 printlines (n
+2, fZone2Code
, fout
);
1081 tab(n
+2,fout
); fout
<< "// Init graph state";
1083 tab(n
+2,fout
); fout
<< "{";
1084 tab(n
+3,fout
); fout
<< "TaskQueue taskqueue(cur_thread);";
1085 tab(n
+3,fout
); fout
<< "int tasknum = -1;";
1086 tab(n
+3,fout
); fout
<< "int count = fFullCount;";
1088 // Init input and output
1089 tab(n
+3,fout
); fout
<< "// Init input and output";
1090 printlines (n
+3, fZone3Code
, fout
);
1092 tab(n
+3,fout
); fout
<< "while (!fIsFinished) {";
1093 tab(n
+4,fout
); fout
<< "switch (tasknum) {";
1095 // Work stealing task
1096 tab(n
+5, fout
); fout
<< "case WORK_STEALING_INDEX: { ";
1097 tab(n
+6, fout
); fout
<< "tasknum = TaskQueue::GetNextTask(cur_thread, fDynamicNumThreads);";
1098 tab(n
+6, fout
); fout
<< "break;";
1099 tab(n
+5, fout
); fout
<< "} ";
1102 tab(n
+5, fout
); fout
<< "case LAST_TASK_INDEX: { ";
1103 tab(n
+6, fout
); fout
<< "fIsFinished = true;";
1104 tab(n
+6, fout
); fout
<< "break;";
1105 tab(n
+5, fout
); fout
<< "} ";
1107 gTaskCount
= START_TASK_INDEX
;
1110 printLoopGraphScheduler (n
+5,fout
);
1112 tab(n
+4,fout
); fout
<< "}";
1113 tab(n
+3,fout
); fout
<< "}";
1114 tab(n
+2,fout
); fout
<< "}";
1115 tab(n
+1,fout
); fout
<< "}";
1119 * Print an auxillary C++ class corresponding to an integer init signal
1121 void SigIntGenKlass::println(int n
, ostream
& fout
)
1123 list
<Klass
* >::iterator k
;
1125 tab(n
,fout
); fout
<< "class " << fKlassName
<< " {";
1127 tab(n
,fout
); fout
<< " private:";
1128 tab(n
+1,fout
); fout
<< "int \tfSamplingFreq;";
1130 for (k
= fSubClassList
.begin(); k
!= fSubClassList
.end(); k
++) (*k
)->println(n
+1, fout
);
1132 printlines(n
+1, fDeclCode
, fout
);
1134 tab(n
,fout
); fout
<< " public:";
1136 tab(n
+1,fout
); fout
<< "int getNumInputs() \t{ "
1137 << "return " << fNumInputs
<< "; }";
1138 tab(n
+1,fout
); fout
<< "int getNumOutputs() \t{ "
1139 << "return " << fNumOutputs
<< "; }";
1141 tab(n
+1,fout
); fout
<< "void init(int samplingFreq) {";
1142 tab(n
+2,fout
); fout
<< "fSamplingFreq = samplingFreq;";
1143 printlines(n
+2, fInitCode
, fout
);
1144 tab(n
+1,fout
); fout
<< "}";
1146 tab(n
+1,fout
); fout
<< "void fill (int count, int output[]) {";
1147 printlines (n
+2, fZone1Code
, fout
);
1148 printlines (n
+2, fZone2Code
, fout
);
1149 printlines (n
+2, fZone2bCode
, fout
);
1150 printlines (n
+2, fZone3Code
, fout
);
1151 printLoopGraphInternal (n
+2,fout
);
1152 tab(n
+1,fout
); fout
<< "}";
1154 tab(n
,fout
); fout
<< "};\n" << endl
;
1158 * Print an auxillary C++ class corresponding to an float init signal
1160 void SigFloatGenKlass::println(int n
, ostream
& fout
)
1162 list
<Klass
* >::iterator k
;
1164 tab(n
,fout
); fout
<< "class " << fKlassName
<< " {";
1166 tab(n
,fout
); fout
<< " private:";
1167 tab(n
+1,fout
); fout
<< "int \tfSamplingFreq;";
1169 for (k
= fSubClassList
.begin(); k
!= fSubClassList
.end(); k
++) (*k
)->println(n
+1, fout
);
1171 printlines(n
+1, fDeclCode
, fout
);
1173 tab(n
,fout
); fout
<< " public:";
1175 tab(n
+1,fout
); fout
<< "int getNumInputs() \t{ "
1176 << "return " << fNumInputs
<< "; }";
1177 tab(n
+1,fout
); fout
<< "int getNumOutputs() \t{ "
1178 << "return " << fNumOutputs
<< "; }";
1180 tab(n
+1,fout
); fout
<< "void init(int samplingFreq) {";
1181 tab(n
+2,fout
); fout
<< "fSamplingFreq = samplingFreq;";
1182 printlines(n
+2, fInitCode
, fout
);
1183 tab(n
+1,fout
); fout
<< "}";
1185 tab(n
+1,fout
); fout
<< subst("void fill (int count, $0 output[]) {", ifloat());
1186 printlines (n
+2, fZone1Code
, fout
);
1187 printlines (n
+2, fZone2Code
, fout
);
1188 printlines (n
+2, fZone2bCode
, fout
);
1189 printlines (n
+2, fZone3Code
, fout
);
1190 printLoopGraphInternal(n
+2,fout
);
1191 tab(n
+1,fout
); fout
<< "}";
1193 tab(n
,fout
); fout
<< "};\n" << endl
;
1196 static void merge (set
<string
>& dst
, set
<string
>& src
)
1198 set
<string
>::iterator i
;
1199 for (i
= src
.begin(); i
!= src
.end(); i
++) dst
.insert(*i
);
1202 void Klass::collectIncludeFile(set
<string
>& S
)
1204 list
<Klass
* >::iterator k
;
1206 for (k
= fSubClassList
.begin(); k
!= fSubClassList
.end(); k
++) (*k
)->collectIncludeFile(S
);
1207 merge(S
, fIncludeFileSet
);
1210 void Klass::collectLibrary(set
<string
>& S
)
1212 list
<Klass
* >::iterator k
;
1214 for (k
= fSubClassList
.begin(); k
!= fSubClassList
.end(); k
++) (*k
)->collectLibrary(S
);
1215 merge(S
, fLibrarySet
);