torch3-3.1.orig/0000755000175000017500000000000010106445243013673 5ustar kalfakalfa00000000000000torch3-3.1.orig/core/0000755000175000017500000000000010106445234014623 5ustar kalfakalfa00000000000000torch3-3.1.orig/core/OutputMeasurer.h0000644000175000017500000000447110106445234020006 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef OUTPUT_MEASURER_INC #define OUTPUT_MEASURER_INC #include "Measurer.h" namespace Torch { /** Compute the outputification error (in %) of the #inputs# with respect to the #targets# of #data#. The format of the output is given with a #OutputFormat#. It can print the confusion matrix if specified. @author Ronan Collobert (collober@idiap.ch) */ class OutputMeasurer : public Measurer { public: /// Sequences to output. SequenceList *sequences; /// Sum of frame size of all sequences int total_frame_size; //----- /// OutputMeasurer(DataSet *data_, XFile *file_); /// Adds a sequence which will be outputed. void addSequence(Sequence *sequence); //----- virtual void measureExample(); virtual ~OutputMeasurer(); }; } #endif torch3-3.1.orig/core/Allocator.cc0000644000175000017500000001312110106445234017050 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Allocator.h" #include "Object.h" namespace Torch { Allocator::Allocator() { ptrs = NULL; } void *Allocator::alloc(size_t size, int object_style) { // Are you stupid ? if(size <= 0) return(NULL); // Allocate what you need void *ptr = sysAlloc(size); if(!ptr) error("Allocator: not enough memory. Buy new ram."); // Save the pointer retain(ptr, object_style); return(ptr); } void *Allocator::realloc(void *ptr, size_t size) { // Free it ? if(size <= 0) { Allocator::free(ptr); return NULL; } // Find the node bool is_mine = false; AllocatorNode *ptrs_ = ptrs; if(ptr == NULL) is_mine = true; else { while(ptrs_) { if(ptrs_->ptr == ptr) { is_mine = true; break; } ptrs_ = ptrs_->next; } } if(!is_mine) error("Allocator: cannot realloc a pointer which is not mine."); if(ptr) { // Reallocate (Here, size > 0 for sure) void *ptrx = sysRealloc(ptr, size); if(!ptrx) error("Allocator: not enough memory. Buy new ram."); // Save the new pointer ptrs_->ptr = ptrx; return(ptrx); } else { // Forcement pas un objet return(this->alloc(size, 0)); } } void Allocator::free(void *ptr) { // Gni? if(!ptr) return; // Release the pointer int object_style = release(ptr); // Free it // message("Allocator [%p] free %p mode %d", this, ptr, object_style); if(object_style == 0) ::free(ptr); else { if(object_style == 1) delete (Object *)ptr; else ((Object *)ptr)->~Object(); } } void Allocator::retain(void *ptr, int object_style) { #ifdef DEBUG AllocatorNode *dbg_ptr = isMine(ptr); if(dbg_ptr) { if(object_style != 2) error("Allocator [debug mode]: try to retain a previously retained pointer! You'll destruct an inexistant object."); if(object_style == dbg_ptr->object_style) error("Allocator [debug mode]: try to retain a previously retained pointer with same mode [%d]!", object_style); } #endif // Create a new node to be placed *before* the root AllocatorNode *ptrs_ = (AllocatorNode *)sysAlloc(sizeof(AllocatorNode)); if(!ptrs_) error("Allocator: not enough memory. Buy new ram."); ptrs_->prev = NULL; ptrs_->next = ptrs; if(ptrs) ptrs->prev = ptrs_; // Save the root ptrs = ptrs_; // Save the pointer ptrs->ptr = ptr; ptrs->object_style = object_style; } int Allocator::release(void *ptr) { // Find the node (Note: Start with the beginning... recent is faster!) bool is_mine = false; AllocatorNode *ptrs_ = ptrs; while(ptrs_) { if(ptrs_->ptr == ptr) { is_mine = true; break; } ptrs_ = ptrs_->next; } if(!is_mine) error("Allocator: cannot release a pointer which is not mine."); // Check the links if(ptrs_->next) ptrs_->next->prev = ptrs_->prev; if(ptrs_->prev) ptrs_->prev->next = ptrs_->next; else // Viens-t-on de scrapper le root ? ptrs = ptrs_->next; // Free the node and return if object or not int object_style = ptrs_->object_style; ::free(ptrs_); return(object_style); } void Allocator::steal(void *ptr, Allocator *allocator) { int object_style = allocator->release(ptr); retain(ptr, object_style); } void Allocator::steal(Allocator *allocator) { while(allocator->ptrs) steal(allocator->ptrs->ptr, allocator); } AllocatorNode *Allocator::isMine(void *ptr) { AllocatorNode *ptrs_ = ptrs; while(ptrs_) { if(ptrs_->ptr == ptr) return ptrs_; ptrs_ = ptrs_->next; } return NULL; } void Allocator::freeAll() { while(ptrs) this->free(ptrs->ptr); } void *Allocator::sysAlloc(int size) { if(size <= 0) return(NULL); void *ptr = malloc(size); if(!ptr) error("Allocator: not enough memory. Buy new ram"); return(ptr); } void *Allocator::sysRealloc(void *ptr, int size) { void *ptr_ = NULL; if(size <= 0) ::free(ptr); else { ptr_ = ::realloc(ptr, size); if(!ptr_) error("Allocator: not enough memory. Buy new ram"); } return(ptr_); } Allocator::~Allocator() { freeAll(); } } torch3-3.1.orig/core/Stack.h0000644000175000017500000000430110106445234016037 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef STACK_INC #define STACK_INC #include "Object.h" namespace Torch { struct StackNode { void *address; void *ptr; int size; }; /** This is an implementation of a "stack". You can "push" an array in memory, which will be restored when calling "pop", in a stack manner. @author Ronan Collobert (collober@idiap.ch) */ class Stack : public Object { private: int stack_size; public: StackNode *stack; int n_stacked_objects; /// Create a stack. Stack(); /// Save #size# bytes of memory given by #ptr#. void push(void *ptr, int size); /// Restore the last pushed memory. void pop(); virtual ~Stack(); }; } #endif torch3-3.1.orig/core/Allocator.h0000644000175000017500000001042310106445234016714 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef ALLOCATOR_INC #define ALLOCATOR_INC #include "general.h" namespace Torch { struct AllocatorNode { void *ptr; int object_style; AllocatorNode *prev; AllocatorNode *next; }; /** Class do easily allocate/deallocate memory in Torch. The memory allocated by an allocator will be destroyed when the allocator will be destroyed. @see Object @author Ronan Collobert (collober@idiap.ch) */ class Allocator { public: AllocatorNode *ptrs; /// Create a new allocator. Allocator(); /** Returns #size# bytes of allocated memory. \begin{itemize} \item If #object_style# is 0, the allocated memory is considered as if it wasn't for an object. At the destruction of the allocator the memory will be freed, and that's all. \item If 1, the #Object# destructor will be called at the destruction and the memory will be freed. \item If 2, the destructor will be called, but the memory won't be freed. \end{itemize} */ void *alloc(size_t size, int object_style=0); /** Reallocate a part of the memory which has been already allocated with alloc (and #object_style#=0). Same behaviour as the #realloc()# system function. */ void *realloc(void *ptr, size_t size); /** Force given pointer to memory to be freed now. It considers the #object_style# given by #alloc()# and calls the Object destructor, if needed. */ void free(void *ptr); /** Tells to the allocator that it should handle the memory given by #ptr#. Take in account the #object_style#. */ void retain(void *ptr, int object_style=0); /** Tells to the allocator to stop taking in account the memory given by #ptr#. The memory will not be released. */ int release(void *ptr); /** Handles the memory given by #ptr# which was previouly handled by #allocator#. #allocator# looses the control of this memory */ void steal(void *ptr, Allocator *allocator); /// Steals all pointers contained in #allocator#. void steal(Allocator *allocator); /// Returns true iff ptr is handled by the allocator. AllocatorNode *isMine(void *ptr); /** Force all pointers contained in the allocator to be freed now. It considers the #object_style# given by #alloc()# and calls the Object destructor, if needed. */ void freeAll(); /** System allocation. As system malloc function, but do an error if there is no more memory. */ static void *sysAlloc(int size); /** System reallocation. As system realloc function, but do an error if there is no more memory. */ static void *sysRealloc(void *ptr, int size); ~Allocator(); }; } #endif torch3-3.1.orig/core/Stack.cc0000644000175000017500000000454210106445234016204 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Stack.h" namespace Torch { Stack::Stack() { stack_size = 0; stack = NULL; n_stacked_objects = 0; } void Stack::push(void *ptr, int size) { if(n_stacked_objects >= stack_size) { stack_size++; stack = (StackNode *)allocator->realloc(stack, sizeof(StackNode)*stack_size); } stack[n_stacked_objects].address = ptr; stack[n_stacked_objects].size = size; if(ptr) { stack[n_stacked_objects].ptr = allocator->alloc(size); memcpy(stack[n_stacked_objects].ptr, ptr, size); } else stack[n_stacked_objects].ptr = NULL; n_stacked_objects++; } void Stack::pop() { n_stacked_objects--; if(stack[n_stacked_objects].ptr) memcpy(stack[n_stacked_objects].address, stack[n_stacked_objects].ptr, stack[n_stacked_objects].size); allocator->free(stack[n_stacked_objects].ptr); } Stack::~Stack() { } } torch3-3.1.orig/core/Bagging.cc0000644000175000017500000000727010106445234016476 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Bagging.h" #include "Random.h" namespace Torch { Bagging::Bagging(WeightedSumMachine* w_machine_) : Trainer(w_machine_) { w_machine = w_machine_; n_trainers = w_machine->n_trainers; n_unselected_examples = (int *)allocator->alloc(sizeof(int)*n_trainers); unselected_examples = (int **)allocator->alloc(sizeof(int*)*n_trainers); selected_examples = (int **)allocator->alloc(sizeof(int*)*n_trainers); is_selected_examples = NULL; } void Bagging::bootstrapData(int* selected, int* is_selected, int n_examples) { for (int j=0;jn_examples; is_selected_examples = (int *)allocator->realloc(is_selected_examples, sizeof(int)*n); for (int i = 0; i < n_trainers; i++) { unselected_examples[i] = (int *)allocator->realloc(unselected_examples[i], sizeof(int)*n); selected_examples[i] = (int *)allocator->realloc(selected_examples[i], sizeof(int)*n); } for(int i = 0; i < n_trainers; i++) w_machine->weights[i] = 1./((real)n_trainers); message("Bagging: training"); w_machine->n_trainers_trained = 0; for (int i=0;ipushSubset(selected_examples[i],n); // keep in mind examples not used by trainers[i] int k=0; for (int j=0;jtrainers[i]->machine->reset(); w_machine->trainers[i]->train(data, w_machine->trainers_measurers ? w_machine->trainers_measurers[i] : NULL); // put back the selected_examples data->popSubset(); w_machine->n_trainers_trained = i+1; // if measurers is given, call the test method by fooling it // with the number of trainers if (measurers) test(measurers); } } Bagging::~Bagging() { } } torch3-3.1.orig/core/CmdOption.h0000644000175000017500000001350210106445234016671 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef CMD_OPTION_INC #define CMD_OPTION_INC #include "Object.h" namespace Torch { /** This class defines an option for the command line. If you need special command line arguments/options, you have to create a new children of this class. @author Ronan Collobert (collober@idiap.ch) @see CmdLine */ class CmdOption : public Object { private: // Special flags. bool is_option; bool is_argument; bool is_text; bool is_master_switch; public: /// Name of the option. char *name; /// Type name of the option. char *type_name; /** An help string. Cannot be NULL. */ char *help; /** True is the option has to be saved when saving the command line. */ bool save; /** True is the option has been setted after reading the command-line. */ bool is_setted; ////////////////////// /// CmdOption(const char *name_, const char *type_name_, const char *help_="", bool save_=false); /// Initialize the value of the option. virtual void initValue(); /// If #is_setted# is true, print the current value, else the init value. virtual void printValue(XFile *file_); /** Read the option on the command line. argv_ and argc_ have to point of the next option after that. */ virtual void read(int *argc_, char ***argv_); /* Return true if the option is on the command line. Decrements argc_ and increment argv_ if true. */ bool isCurrent(int *argc_, char ***argv_); /** Returns true if it's an optional argument. If #set_# is true, set it to an optional argument. */ bool isOption(bool set_=false); /** Returns true if it's a required argument. If #set_# is true, set it to a required argument. */ bool isArgument(bool set_=false); /** Returns true if it's just text to be displayed in the command line. If #set_# is true, set it to text mode. */ bool isText(bool set_=false); /** Returns true if it's a master switch. If #set_# is true, set it to a master switch. */ bool isMasterSwitch(bool set_=false); ~CmdOption(); }; /** This class defines a integer command-line option. @author Ronan Collobert (collober@idiap.ch) @see CmdLine */ class IntCmdOption : public CmdOption { public: int *ptr; int init_value; /// IntCmdOption(const char *name_, int *ptr_, int init_value_, const char *help_="", bool save_=false); virtual void initValue(); virtual void printValue(XFile *file_); virtual void read(int *argc_, char ***argv_); virtual void loadXFile(XFile *file); virtual void saveXFile(XFile *file); ~IntCmdOption(); }; /** This class defines a real command-line option. @author Ronan Collobert (collober@idiap.ch) @see CmdLine */ class RealCmdOption : public CmdOption { public: real *ptr; real init_value; /// RealCmdOption(const char *name_, real *ptr_, real init_value_, const char *help_="", bool save_=false); virtual void initValue(); virtual void printValue(XFile *file_); virtual void read(int *argc_, char ***argv_); virtual void loadXFile(XFile *file); virtual void saveXFile(XFile *file); ~RealCmdOption(); }; /** This class defines a bool command-line option. @author Ronan Collobert (collober@idiap.ch) @see CmdLine */ class BoolCmdOption : public CmdOption { public: bool *ptr; bool init_value; /// BoolCmdOption(const char *name_, bool *ptr_, bool init_value_, const char *help_="", bool save_=false); virtual void initValue(); virtual void read(int *argc_, char ***argv_); virtual void loadXFile(XFile *file); virtual void saveXFile(XFile *file); ~BoolCmdOption(); }; /** This class defines a string command-line option. @author Ronan Collobert (collober@idiap.ch) @see CmdLine */ class StringCmdOption : public CmdOption { public: char **ptr; char *init_value; /// StringCmdOption(const char *name_, char **ptr_, const char *init_value_, const char *help_="", bool save_=false); virtual void initValue(); virtual void printValue(XFile *file_); virtual void read(int *argc_, char ***argv_); virtual void loadXFile(XFile *file); virtual void saveXFile(XFile *file); ~StringCmdOption(); }; } #endif torch3-3.1.orig/core/Bagging.h0000644000175000017500000000604310106445234016335 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef BAGGING_INC #define BAGGING_INC #include "Trainer.h" #include "Measurer.h" #include "DataSet.h" #include "WeightedSumMachine.h" namespace Torch { /** This class represents a #Trainer# that implements the well-known Bagging algorithm (Breiman, 1996). A "bagger" contains a series of trainers, each trained on a bootstrap of the original dataset. The output of the bagging is then the average of the output of each trainer. It is implemented using a #WeightedSumMachine# that performs the combination. @author Samy Bengio (bengio@idiap.ch) @see WeightedSumMachine */ class Bagging : public Trainer { public: /// This machine performs the combination. It contains many trainers. WeightedSumMachine* w_machine; /// The number of trainers in the bagging. int n_trainers; /// for each trainer, keep the indices of examples not used during training int** unselected_examples; /// for each trainer, keep the indices of examples used during training int** selected_examples; /// for each trainer, keep the number of examples not used during training int* n_unselected_examples; /// for each trainer, keep the number of examples used during training int* is_selected_examples; /// Bagging(WeightedSumMachine *w_machine); /// create a boostrap of the data and put in in selected virtual void bootstrapData(int* selected, int* is_selected, int n_examples); virtual void train(DataSet *data, MeasurerList* measurers); virtual ~Bagging(); }; } #endif torch3-3.1.orig/core/Boosting.cc0000644000175000017500000001303110106445234016714 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Boosting.h" #include "BoostingMeasurer.h" #include "Random.h" #include "NullXFile.h" namespace Torch { static void randw(int *selected_examples, real *ex_weights, int n_examples) { real *repartition = (real *)Allocator::sysAlloc(sizeof(real)*(n_examples+1)); repartition[0] = 0; for(int i = 0; i < n_examples; i++) repartition[i+1] = repartition[i]+ex_weights[i]; for(int i = 0; i < n_examples; i++) { real z = Random::uniform(); int gauche = 0; int droite = n_examples; while(gauche+1 != droite) { int centre = (gauche+droite)/2; if(repartition[centre] < z) gauche = centre; else droite = centre; } selected_examples[i] = gauche; // printf("%g < %g < %g\n", repartition[gauche], z, repartition[gauche+1]); } free(repartition); } Boosting::Boosting(WeightedSumMachine* w_machine_, ClassFormat *class_format_) : Trainer(w_machine_) { w_machine = w_machine_; class_format = class_format_; n_trainers = w_machine->n_trainers; weights = w_machine->weights; } void Boosting::train(DataSet *data, MeasurerList* measurers) { int n_examples = data->n_examples; int *selected_examples = (int *)Allocator::sysAlloc(n_examples*sizeof(int)); real *ex_weights = (real *)Allocator::sysAlloc(n_examples*sizeof(real)); for(int t = 0; t < n_examples; t++) ex_weights[t] = 1./((real)n_examples); NullXFile null_xfile; BoostingMeasurer *measurer = new BoostingMeasurer(class_format, &null_xfile); measurer->setDataSet(data); measurer->setWeights(ex_weights); message("Boosting: training..."); w_machine->n_trainers_trained = 0; MeasurerList the_boost_meas; the_boost_meas.addNode(measurer); // Initialise le boxon. for(int i = 0; i < n_trainers; i++) weights[i] = 0; for(int i = 0; i < n_trainers; i++) { randw(selected_examples, ex_weights, n_examples); data->pushSubset(selected_examples, n_examples); w_machine->trainers[i]->machine->reset(); w_machine->trainers[i]->train(data, w_machine->trainers_measurers ? w_machine->trainers_measurers[i] : NULL); data->popSubset(); // Calcule le nouveau 'beta'... measurer->setInputs(w_machine->trainers[i]->machine->outputs); w_machine->trainers[i]->test(&the_boost_meas); // Ben on vient d'entrainer un truc tu sais... w_machine->n_trainers_trained = i+1; // Check if all is classified [cas limite 1] if(measurer->beta == 0) { for(int j = 0; j < n_trainers; j++) weights[j] = 0; weights[i] = 1; warning("Boosting: train stopped. All examples are well classified."); // On teste quand meme... if(measurers) test(measurers); break; } // Regarde si c'est la misere [cas limite 2] if(measurer->beta >= 1) { w_machine->n_trainers_trained = i; warning("Boosting: train stopped. Model %d too weak.", i); break; } // Si tout va bien... //////////////////////// // Compute new weights int *ptr_status = measurer->status; real mul_pos = exp( 0.5*log(measurer->beta)); real mul_neg = exp(-0.5*log(measurer->beta)); for(int t = 0; t < n_examples; t++) { if(ptr_status[t] > 0) ex_weights[t] *= mul_pos; else ex_weights[t] *= mul_neg; } // Normalize les poids des exemples real z = 0; for(int t = 0; t < n_examples; t++) z += ex_weights[t]; for(int t = 0; t < n_examples; t++) ex_weights[t] /= z; // Refourgue le bon poids a l'autre naze... weights[i] = -0.5*log(measurer->beta); // Teste pour voir ce que ca donne... if(measurers) test(measurers); // Fin de si tout va bien... //////////////// } // Jarte la misere free(selected_examples); free(ex_weights); delete measurer; // Vraiment faible mon pote! if(w_machine->n_trainers_trained == 0) return; // Normalize trainers weights... real z = 0; for(int i = 0; i < n_trainers; i++) z += weights[i]; for(int i = 0; i < n_trainers; i++) weights[i] /= z; } Boosting::~Boosting() { } } torch3-3.1.orig/core/CmdOption.cc0000644000175000017500000001604610106445234017035 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "CmdOption.h" #include "XFile.h" namespace Torch { CmdOption::CmdOption(const char *name_, const char *type_name_, const char *help_, bool save_) { name = (char *)allocator->alloc(strlen(name_)+1); strcpy(name, name_); type_name = (char *)allocator->alloc(strlen(type_name_)+1); strcpy(type_name, type_name_); help = (char *)allocator->alloc(strlen(help_)+1); strcpy(help, help_); save = save_; is_setted = false; is_option = true; is_argument = false; is_text = false; is_master_switch = false; } bool CmdOption::isOption(bool set_) { if(set_) { is_option = true; is_argument = false; is_text = false; is_master_switch = false; } return is_option; } bool CmdOption::isArgument(bool set_) { if(set_) { is_option = false; is_argument = true; is_text = false; is_master_switch = false; } return is_argument; } bool CmdOption::isText(bool set_) { if(set_) { is_option = false; is_argument = false; is_text = true; is_master_switch = false; } return is_text; } bool CmdOption::isMasterSwitch(bool set_) { if(set_) { is_option = false; is_argument = false; is_text = false; is_master_switch = true; } return is_master_switch; } void CmdOption::initValue() { } void CmdOption::printValue(XFile *file_) { } void CmdOption::read(int *argc_, char ***argv_) { } bool CmdOption::isCurrent(int *argc_, char ***argv_) { if(!is_option && !is_master_switch) return false; if(strcmp((*argv_)[0], name)) return false; else { (*argc_)--; (*argv_)++; return true; } } CmdOption::~CmdOption() { } //-------------------------- int IntCmdOption::IntCmdOption(const char *name_, int *ptr_, int init_value_, const char *help_, bool save_) : CmdOption(name_, "", help_, save_) { ptr = ptr_; init_value = init_value_; } void IntCmdOption::initValue() { *ptr = init_value; } void IntCmdOption::printValue(XFile *file_) { if(is_setted) file_->printf("[%d]", *ptr); else file_->printf("[%d]", init_value); } void IntCmdOption::read(int *argc_, char ***argv_) { char **argv = *argv_; char *maryline; if(*argc_ == 0) error("IntCmdOption: cannot correctly set <%s>", name); *ptr = strtol(argv[0], &maryline, 10); if( *maryline != '\0' ) error("IntCmdOption: <%s> requires an integer", name); (*argc_)--; (*argv_)++; } void IntCmdOption::loadXFile(XFile *file) { file->taggedRead(ptr, sizeof(int), 1, name); } void IntCmdOption::saveXFile(XFile *file) { file->taggedWrite(ptr, sizeof(int), 1, name); } IntCmdOption::~IntCmdOption() { } //-------------------------- real RealCmdOption::RealCmdOption(const char *name_, real *ptr_, real init_value_, const char *help_, bool save_) : CmdOption(name_, "", help_, save_) { ptr = ptr_; init_value = init_value_; } void RealCmdOption::initValue() { *ptr = init_value; } void RealCmdOption::printValue(XFile *file_) { if(is_setted) file_->printf("[%g]", *ptr); else file_->printf("[%g]", init_value); } void RealCmdOption::read(int *argc_, char ***argv_) { char **argv = *argv_; char *maryline; if(*argc_ == 0) error("RealCmdOption: cannot correctly set <%s>", name); *ptr = strtod(argv[0], &maryline); if( *maryline != '\0' ) error("RealCmdOption: <%s> requires a real", name); (*argc_)--; (*argv_)++; } void RealCmdOption::loadXFile(XFile *file) { file->taggedRead(ptr, sizeof(real), 1, name); } void RealCmdOption::saveXFile(XFile *file) { file->taggedWrite(ptr, sizeof(real), 1, name); } RealCmdOption::~RealCmdOption() { } //-------------------------- switch BoolCmdOption::BoolCmdOption(const char *name_, bool *ptr_, bool init_value_, const char *help_, bool save_) : CmdOption(name_, "", help_, save_) { ptr = ptr_; init_value = init_value_; } void BoolCmdOption::initValue() { *ptr = init_value; } void BoolCmdOption::read(int *argc_, char ***argv_) { *ptr = !(*ptr); } void BoolCmdOption::loadXFile(XFile *file) { int melanie; file->taggedRead(&melanie, sizeof(int), 1, name); *ptr = (melanie ? 1 : 0); } void BoolCmdOption::saveXFile(XFile *file) { int melanie = (*ptr ? 1 : 0); file->taggedWrite(&melanie, sizeof(int), 1, name); } BoolCmdOption::~BoolCmdOption() { } //-------------------------- string StringCmdOption::StringCmdOption(const char *name_, char **ptr_, const char *init_value_, const char *help_, bool save_) : CmdOption(name_, "", help_, save_) { ptr = ptr_; init_value = (char *)allocator->alloc(strlen(init_value_)+1); strcpy(init_value, init_value_); } void StringCmdOption::initValue() { *ptr = (char *)allocator->alloc(strlen(init_value)+1); strcpy(*ptr, init_value); } void StringCmdOption::printValue(XFile *file_) { if(is_setted) file_->printf("[%s]", *ptr); else file_->printf("[%s]", init_value); } void StringCmdOption::read(int *argc_, char ***argv_) { char **argv = *argv_; if(*argc_ == 0) error("StringCmdOption: cannot correctly set <%s>", name); allocator->free(*ptr); *ptr = (char *)allocator->alloc(strlen(argv[0])+1); strcpy(*ptr, argv[0]); (*argc_)--; (*argv_)++; } void StringCmdOption::loadXFile(XFile *file) { int melanie; file->taggedRead(&melanie, sizeof(int), 1, "SIZE"); *ptr = (char *)allocator->alloc(melanie); file->taggedRead(*ptr, 1, melanie, name); } void StringCmdOption::saveXFile(XFile *file) { int melanie = strlen(*ptr)+1; file->taggedWrite(&melanie, sizeof(int), 1, "SIZE"); file->taggedWrite(*ptr, 1, melanie, name); } StringCmdOption::~StringCmdOption() { } } torch3-3.1.orig/core/Boosting.h0000644000175000017500000000521710106445234016565 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef BOOSTING_INC #define BOOSTING_INC #include "Trainer.h" #include "WeightedSumMachine.h" #include "ClassFormat.h" namespace Torch { /** Boosting implementation. As the idea of boosting in regression hasn't been really well tested, this is boosting for *classification* only. This trainer will "boost" the machine given by the #WeightedSumMachine#, on the given #DataSet#. You have to provide a #ClassFormat# to know how the classes are encoded. @author Ronan Collobert (collober@idiap.ch) @see WeightedSumMachine */ class Boosting : public Trainer { public: /// This machine performs the combination. It contains many trainers. WeightedSumMachine *w_machine; /// #ClassFormat# to know how the classes are encoded. ClassFormat *class_format; /// The number of trainers in the boosting. int n_trainers; /// The weights of each machine in the boosting. real *weights; /// Boosting(WeightedSumMachine *w_machine_, ClassFormat *class_format_); //----- virtual void train(DataSet *data, MeasurerList *measurers); virtual ~Boosting(); }; } #endif torch3-3.1.orig/core/BoostingMeasurer.cc0000644000175000017500000000560610106445234020431 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // and Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "BoostingMeasurer.h" namespace Torch { BoostingMeasurer::BoostingMeasurer(ClassFormat *class_format_, XFile *file_) : Measurer(NULL, file_) { weights = NULL; inputs = NULL; status = NULL; class_format = class_format_; internal_error = 0; current_example = 0; } void BoostingMeasurer::setDataSet(DataSet *data_) { data = data_; status = (int *)allocator->realloc(status, sizeof(int)*data->n_examples); } void BoostingMeasurer::setWeights(real *weights_) { weights = weights_; } void BoostingMeasurer::setInputs(Sequence *inputs_) { inputs = inputs_; } void BoostingMeasurer::measureExample() { int c_obs = class_format->getClass(inputs->frames[0]); int c_des = class_format->getClass(data->targets->frames[0]); if(c_obs != c_des) { internal_error += weights[current_example]; status[current_example++] = -1; } else status[current_example++] = 1; } void BoostingMeasurer::measureIteration() { beta = internal_error/(1. - internal_error); if(binary_mode) { file->write(&internal_error, sizeof(real), 1); file->write(&beta, sizeof(real), 1); } else file->printf("%g ==> %g for beta\n", internal_error, beta); file->flush(); reset(); } void BoostingMeasurer::reset() { internal_error = 0; current_example = 0; } BoostingMeasurer::~BoostingMeasurer() { } } torch3-3.1.orig/core/BoostingMeasurer.h0000644000175000017500000000555710106445234020300 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // and Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef BOOSTING_MEASURER_INC #define BOOSTING_MEASURER_INC #include "Measurer.h" #include "DataSet.h" #include "ClassFormat.h" namespace Torch { /** Compute the classification weighted error (in %) for #BoostingMachine# of the #inputs# with respect to the #targets# of #data#. The weights are given by #setWeights()#. Needed by #Boosting#. (Designed just for it). The format of the class is given with a #ClassFormat#. @author Ronan Collobert (collober@idiap.ch) @author Samy Bengio (bengio@idiap.ch) @see Boosting */ class BoostingMeasurer : public Measurer { public: // General Sequence *inputs; real *weights; real beta; int *status; real internal_error; int current_example; ClassFormat *class_format; /// BoostingMeasurer(ClassFormat *class_format_, XFile *file_); /// Set the current working dataset. virtual void setDataSet(DataSet *data_); /// Set the current weights of training examples. virtual void setWeights(real *weights_); /// Gives the pointer where the measure will be done. virtual void setInputs(Sequence *inputs_); //----- void init_(); virtual void reset(); virtual void measureExample(); virtual void measureIteration(); virtual ~BoostingMeasurer(); }; } #endif torch3-3.1.orig/core/ClassFormat.cc0000644000175000017500000000324610106445234017355 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "ClassFormat.h" namespace Torch { ClassFormat::ClassFormat() { n_classes = 0; class_labels = NULL; } ClassFormat::~ClassFormat() { } } torch3-3.1.orig/core/ClassFormat.h0000644000175000017500000000454010106445234017215 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef CLASS_FORMAT_INC #define CLASS_FORMAT_INC #include "Object.h" namespace Torch { /** Used to define a class code. @author Ronan Collobert (collober@idiap.ch) */ class ClassFormat : public Object { public: /// Number of classes that the object currently handles int n_classes; /// The label of each class real **class_labels; /// ClassFormat(); /// Returns the output size. virtual int getOutputSize() = 0; /// Transforms the output from a OneHot representation. virtual void fromOneHot(real *outputs, real *one_hot_outputs) = 0; /// Transforms the output to a OneHot representation. virtual void toOneHot(real *outputs, real *one_hot_outputs) = 0; /// Returns the class of #vector#. virtual int getClass(real *vector) = 0; virtual ~ClassFormat(); }; } #endif torch3-3.1.orig/core/ClassMeasurer.cc0000644000175000017500000000743710106445234017716 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "ClassMeasurer.h" namespace Torch { ClassMeasurer::ClassMeasurer(Sequence *inputs_, DataSet *data_, ClassFormat *class_format_, XFile *file_, bool calc_confusion_at_each_iter_) : Measurer(data_, file_) { inputs = inputs_; class_format = class_format_; calc_confusion_at_each_iter = calc_confusion_at_each_iter_; confusion = NULL; n_classes = class_format->n_classes; if(calc_confusion_at_each_iter) { confusion = (int **)allocator->alloc(sizeof(int*)*n_classes); for(int i = 0; i < n_classes; i++) confusion[i] = (int *)allocator->alloc(sizeof(int)*n_classes); } reset_(); } void ClassMeasurer::measureExample() { for(int i = 0; i < inputs->n_frames; i++) { int c_obs = class_format->getClass(inputs->frames[i]); int c_des = class_format->getClass(data->targets->frames[i]); if(c_obs != c_des) internal_error += 1.; if(calc_confusion_at_each_iter) confusion[c_obs][c_des]++; n_examples++; } } void ClassMeasurer::measureIteration() { internal_error /= n_examples; if(binary_mode) file->write(&internal_error, sizeof(real), 1); else file->printf("%g\n", internal_error); file->flush(); if(calc_confusion_at_each_iter) printConfusionMatrix(); reset(); } void ClassMeasurer::printConfusionMatrix() { if(binary_mode) { for(int i = 0; i < n_classes; i++) file->write(confusion[i], sizeof(real), n_classes); } else { file->printf("# Labels of classes:\n"); for(int i = 0; i < n_classes; i++) { for(int j = 0; j < class_format->getOutputSize(); j++) file->printf("%g ", class_format->class_labels[i][j]); file->printf("\n"); } file->printf("# Confusion matrix [rows: observed, colums: desired]:\n"); for(int i = 0; i < n_classes; i++) { for(int j = 0; j < n_classes; j++) file->printf("%d ", confusion[i][j]); file->printf("\n"); } } file->flush(); } void ClassMeasurer::reset_() { n_examples = 0; internal_error = 0; if(calc_confusion_at_each_iter) { for(int i = 0; i < n_classes; i++) { for(int j = 0; j < n_classes; j++) confusion[i][j] = 0; } } } void ClassMeasurer::reset() { reset_(); } ClassMeasurer::~ClassMeasurer() { } } torch3-3.1.orig/core/ClassMeasurer.h0000644000175000017500000000477110106445234017556 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef CLASS_MEASURER_INC #define CLASS_MEASURER_INC #include "Measurer.h" #include "ClassFormat.h" namespace Torch { /** Compute the classification error (in %) of the #inputs# with respect to the #targets# of #data#. The format of the class is given with a #ClassFormat#. It can print the confusion matrix if specified. @author Ronan Collobert (collober@idiap.ch) */ class ClassMeasurer : public Measurer { public: real internal_error; int **confusion; Sequence *inputs; ClassFormat *class_format; bool calc_confusion_at_each_iter; int n_classes; int n_examples; //----- /// ClassMeasurer(Sequence *inputs_, DataSet *data_, ClassFormat *class_format_, XFile *file_, bool calc_confusion_at_each_iter_=false); //----- void printConfusionMatrix(); virtual void reset(); virtual void measureExample(); virtual void measureIteration(); void reset_(); virtual ~ClassMeasurer(); }; } #endif torch3-3.1.orig/core/ClassNLLMeasurer.cc0000644000175000017500000000514710106445234020260 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "ClassNLLMeasurer.h" namespace Torch { ClassNLLMeasurer::ClassNLLMeasurer(Sequence *inputs_, DataSet *data_, ClassFormat *class_format_, XFile *file_) : Measurer(data_, file_) { class_format = class_format_; inputs = inputs_; internal_error = 0; addBOption("average examples", &average_examples, true, "divided by the number of examples"); addBOption("average frames", &average_frames, true, "divided by the number of frames"); } void ClassNLLMeasurer::measureExample() { real sum = 0; for(int i = 0; i < inputs->n_frames; i++) { int the_class = class_format->getClass(data->targets->frames[i]); sum += inputs->frames[i][the_class]; } if(average_frames) sum /= inputs->n_frames; internal_error -= sum; } void ClassNLLMeasurer::measureIteration() { if(average_examples) internal_error /= data->n_examples; if(binary_mode) file->write(&internal_error, sizeof(real), 1); else file->printf("%g\n", internal_error); file->flush(); reset(); } void ClassNLLMeasurer::reset() { internal_error = 0; } ClassNLLMeasurer::~ClassNLLMeasurer() { } } torch3-3.1.orig/core/ClassNLLMeasurer.h0000644000175000017500000000501410106445234020113 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef CLASS_NLL_MEASURER_INC #define CLASS_NLL_MEASURER_INC #include "Measurer.h" #include "DataSet.h" #include "ClassFormat.h" namespace Torch { /** This class measures the negative log likelihood. In fact, it supposes that for each input frames, frames[i] is the log-probability for class $i$. The given #class_format# gives the class format of the targets of the dataset. By default, the measurer divides the log-probabilty by the number of examples and the number of frames. @author Ronan Collobert (collober@idiap.ch) */ class ClassNLLMeasurer : public Measurer { public: ClassFormat *class_format; bool average_examples; bool average_frames; real internal_error; Sequence *inputs; //----- /// ClassNLLMeasurer(Sequence *inputs_, DataSet *data_, ClassFormat *class_format_, XFile *file_); //----- virtual void reset(); virtual void measureExample(); virtual void measureIteration(); virtual ~ClassNLLMeasurer(); }; } #endif torch3-3.1.orig/core/CmdLine.cc0000644000175000017500000003444310106445234016455 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "CmdLine.h" #include namespace Torch { // Oy. J'ai fait le menage dans c'te classe. // Pleins de features non documentees dans le tutorial! CmdLine::CmdLine() { n_master_switches = 1; // the default! n_cmd_options = (int *)allocator->alloc(sizeof(int)); cmd_options = (CmdOption ***)allocator->alloc(sizeof(CmdOption **)); n_cmd_options[0] = 0; cmd_options[0] = NULL; text_info = NULL; working_directory = (char *)allocator->alloc(2); strcpy(working_directory, "."); associated_files = NULL; n_associated_files = 0; master_switch = -1; program_name = (char *)allocator->alloc(1); *program_name = '\0'; addBOption("write log", &write_log, true, "Should I output the cmd.log file ?"); } void CmdLine::info(const char *text) { if(text_info) allocator->free(text_info); text_info = (char *)allocator->alloc(strlen(text)+1); strcpy(text_info, text); } void CmdLine::addCmdOption(CmdOption *option) { if(option->isMasterSwitch()) { n_cmd_options = (int *)allocator->realloc(n_cmd_options, sizeof(int)*(n_master_switches+1)); cmd_options = (CmdOption ***)allocator->realloc(cmd_options, sizeof(CmdOption **)*(n_master_switches+1)); n_cmd_options[n_master_switches] = 0; cmd_options[n_master_switches] = NULL; n_master_switches++; } int n = n_master_switches-1; cmd_options[n] = (CmdOption **)allocator->realloc(cmd_options[n], (n_cmd_options[n]+1)*sizeof(CmdOption *)); cmd_options[n][n_cmd_options[n]] = option; n_cmd_options[n]++; } void CmdLine::addMasterSwitch(const char *text) { CmdOption *option = new(allocator) CmdOption(text, "", "", false); option->isMasterSwitch(true); addCmdOption(option); } void CmdLine::addICmdOption(const char *name, int *ptr, int init_value, const char *help, bool save_it) { IntCmdOption *option = new(allocator) IntCmdOption(name, ptr, init_value, help, save_it); addCmdOption(option); } void CmdLine::addBCmdOption(const char *name, bool *ptr, bool init_value, const char *help, bool save_it) { BoolCmdOption *option = new(allocator) BoolCmdOption(name, ptr, init_value, help, save_it); addCmdOption(option); } void CmdLine::addRCmdOption(const char *name, real *ptr, real init_value, const char *help, bool save_it) { RealCmdOption *option = new(allocator) RealCmdOption(name, ptr, init_value, help, save_it); addCmdOption(option); } void CmdLine::addSCmdOption(const char *name, char **ptr, const char *init_value, const char *help, bool save_it) { StringCmdOption *option = new(allocator) StringCmdOption(name, ptr, init_value, help, save_it); addCmdOption(option); } void CmdLine::addICmdArg(const char *name, int *ptr, const char *help, bool save_it) { IntCmdOption *option = new(allocator) IntCmdOption(name, ptr, 0, help, save_it); option->isArgument(true); addCmdOption(option); } void CmdLine::addBCmdArg(const char *name, bool *ptr, const char *help, bool save_it) { BoolCmdOption *option = new(allocator) BoolCmdOption(name, ptr, false, help, save_it); option->isArgument(true); addCmdOption(option); } void CmdLine::addRCmdArg(const char *name, real *ptr, const char *help, bool save_it) { RealCmdOption *option = new(allocator) RealCmdOption(name, ptr, 0., help, save_it); option->isArgument(true); addCmdOption(option); } void CmdLine::addSCmdArg(const char *name, char **ptr, const char *help, bool save_it) { StringCmdOption *option = new(allocator) StringCmdOption(name, ptr, "", help, save_it); option->isArgument(true); addCmdOption(option); } void CmdLine::addText(const char *text) { CmdOption *option = new(allocator) CmdOption(text, "", "", false); option->isText(true); addCmdOption(option); } int CmdLine::read(int argc_, char **argv_) { allocator->free(program_name); program_name = (char *)allocator->alloc(strlen(argv_[0])+1); strcpy(program_name, argv_[0]); argv = argv_+1; argc = argc_-1; // Look for help request and the Master Switch master_switch = 0; if(argc >= 1) { if( ! (strcmp(argv[0], "-h") && strcmp(argv[0], "-help") && strcmp(argv[0], "--help")) ) help(); for(int i = 1; i < n_master_switches; i++) { if(cmd_options[i][0]->isCurrent(&argc, &argv)) { master_switch = i; break; } } } CmdOption **cmd_options_ = cmd_options[master_switch]; int n_cmd_options_ = n_cmd_options[master_switch]; // Initialize the options. for(int i = 0; i < n_cmd_options_; i++) cmd_options_[i]->initValue(); while(argc > 0) { // First, check the option. int current_option = -1; for(int i = 0; i < n_cmd_options_; i++) { if(cmd_options_[i]->isCurrent(&argc, &argv)) { current_option = i; break; } } if(current_option >= 0) { if(cmd_options_[current_option]->is_setted) error("CmdLine: option %s is setted twice", cmd_options_[current_option]->name); cmd_options_[current_option]->read(&argc, &argv); cmd_options_[current_option]->is_setted = true; } else { // Check for arguments for(int i = 0; i < n_cmd_options_; i++) { if(cmd_options_[i]->isArgument() && (!cmd_options_[i]->is_setted)) { current_option = i; break; } } if(current_option >= 0) { cmd_options_[current_option]->read(&argc, &argv); cmd_options_[current_option]->is_setted = true; } else error("CmdLine: parse error near <%s>. Too many arguments.", argv[0]); } } // Check for empty arguments for(int i = 0; i < n_cmd_options_; i++) { if(cmd_options_[i]->isArgument() && (!cmd_options_[i]->is_setted)) { message("CmdLine: not enough arguments!\n"); help(); } } if(write_log) { DiskXFile cmd_log("cmd.log", "w"); writeLog(&cmd_log, false); } return master_switch; } // RhhAHha AH AHa hha hahaAH Ha ha ha void CmdLine::help() { if(text_info) print("%s\n", text_info); for(int master_switch_ = 0; master_switch_ < n_master_switches; master_switch_++) { int n_cmd_options_ = n_cmd_options[master_switch_]; CmdOption **cmd_options_ = cmd_options[master_switch_]; int n_real_options = 0; for(int i = 0; i < n_cmd_options_; i++) { if(cmd_options_[i]->isOption()) n_real_options++; } if(master_switch_ == 0) { print("#\n"); print("# usage: %s", program_name); if(n_real_options > 0) print(" [options]"); } else { print("\n#\n"); print("# or: %s %s", program_name, cmd_options_[0]->name); if(n_real_options > 0) print(" [options]"); } for(int i = 0; i < n_cmd_options_; i++) { if(cmd_options_[i]->isArgument()) print(" <%s>", cmd_options_[i]->name); } print("\n#\n"); // Cherche la longueur max du param int long_max = 0; for(int i = 0; i < n_cmd_options_; i++) { int laurence = 0; if(cmd_options_[i]->isArgument()) laurence = strlen(cmd_options_[i]->name)+2; if(cmd_options_[i]->isOption()) laurence = strlen(cmd_options_[i]->name)+strlen(cmd_options_[i]->type_name)+1; if(long_max < laurence) long_max = laurence; } for(int i = 0; i < n_cmd_options_; i++) { int z = 0; if(cmd_options_[i]->isText()) { z = -1; print("%s", cmd_options_[i]->name); } if(cmd_options_[i]->isArgument()) { z = strlen(cmd_options_[i]->name)+2; print(" "); print("<%s>", cmd_options_[i]->name); } if(cmd_options_[i]->isOption()) { z = strlen(cmd_options_[i]->name)+strlen(cmd_options_[i]->type_name)+1; print(" "); print("%s", cmd_options_[i]->name); print(" %s", cmd_options_[i]->type_name); } if(z >= 0) { for(int i = 0; i < long_max+1-z; i++) print(" "); } if( cmd_options_[i]->isOption() || cmd_options_[i]->isArgument() ) print("-> %s", cmd_options_[i]->help); if(cmd_options_[i]->isArgument()) print(" (%s)", cmd_options_[i]->type_name); if(cmd_options_[i]->isOption()) { DiskXFile std_out(stdout); print(" "); cmd_options_[i]->printValue(&std_out); } if(!cmd_options_[i]->isMasterSwitch()) print("\n"); } } exit(-1); } void CmdLine::setWorkingDirectory(const char* dirname) { allocator->free(working_directory); working_directory = (char *)allocator->alloc(strlen(dirname)+1); strcpy(working_directory, dirname); } char *CmdLine::getPath(const char *filename) { associated_files = (char **)allocator->realloc(associated_files, sizeof(char *)*(n_associated_files+1)); char *path_ = (char *)allocator->alloc(strlen(working_directory)+strlen(filename)+2); strcpy(path_, working_directory); strcat(path_, "/"); strcat(path_, filename); associated_files[n_associated_files] = (char *)allocator->alloc(strlen(filename)+1); strcpy(associated_files[n_associated_files], filename); n_associated_files++; return path_; } DiskXFile *CmdLine::getXFile(const char *filename) { char *full_file_name = this->getPath(filename); DiskXFile *file_ = new(allocator) DiskXFile(full_file_name, "w"); return file_; } void CmdLine::saveXFile(XFile *file) { if(master_switch < 0) error("CmdLine: nothing to save!"); writeLog(file, true); file->taggedWrite(&master_switch, sizeof(int), 1, "MASTER_SWITCH"); CmdOption **cmd_options_ = cmd_options[master_switch]; int n_cmd_options_ = n_cmd_options[master_switch]; for(int i = 0; i < n_cmd_options_; i++) { if(cmd_options_[i]->save) cmd_options_[i]->saveXFile(file); } } void CmdLine::writeLog(XFile *file, bool write_associated_files) { // Header time_t time_ = time(NULL); file->printf("# Date: %s", ctime(&time_)); file->printf("# Program: %s\n", program_name); if(master_switch < 0) file->printf("\n# CmdLine not read\n"); if(master_switch == 0) file->printf("\n# Mode: default\n"); if(master_switch > 0) file->printf("\n# Mode: <%s>\n", cmd_options[master_switch][0]->name); CmdOption **cmd_options_ = cmd_options[master_switch]; int n_cmd_options_ = n_cmd_options[master_switch]; // Cherche la longueur max du param int long_max = 0; for(int i = 0; i < n_cmd_options_; i++) { int z = 0; if(cmd_options_[i]->isArgument()) z = strlen(cmd_options_[i]->name)+2; if(cmd_options_[i]->isOption()) z = strlen(cmd_options_[i]->name)+strlen(cmd_options_[i]->type_name)+1; if(long_max < z) long_max = z; } file->printf("\n# Arguments:\n"); for(int i = 0; i < n_cmd_options_; i++) { if(!cmd_options_[i]->isArgument()) continue; int z = strlen(cmd_options_[i]->name)+2; file->printf(" "); file->printf("%s", cmd_options_[i]->name); if(z >= 0) { for(int i = 0; i < long_max+1-z; i++) file->printf(" "); } cmd_options_[i]->printValue(file); file->printf("\n"); } file->printf("\n# Options:\n"); for(int i = 0; i < n_cmd_options_; i++) { if(!cmd_options_[i]->isOption()) continue; int z = strlen(cmd_options_[i]->name)+2; if(cmd_options_[i]->is_setted) file->printf(" * "); else file->printf(" "); file->printf("%s", cmd_options_[i]->name); if(z >= 0) { for(int i = 0; i < long_max+1-z; i++) file->printf(" "); } cmd_options_[i]->printValue(file); file->printf("\n"); } if(write_associated_files) { file->printf("\n# Associated files:\n"); for(int i = 0; i < n_associated_files; i++) file->printf(" %s\n", associated_files[i]); } file->printf("\n<#>\n\n"); } void CmdLine::loadXFile(XFile *file) { // Skip the header int header_end = 0; while( (header_end != 3) && (!file->eof()) ) { char c; file->scanf("%c", &c); if(c == '<') header_end = 1; else { if(c == '#') { if(header_end == 1) header_end = 2; else header_end = 0; } else { if(c == '>') { if(header_end == 2) { header_end = 3; // the return-lines file->scanf("%c", &c); file->scanf("%c", &c); } else header_end = 0; } else header_end = 0; } } } if(header_end != 3) error("CmdLine: cannot find the end of the header!"); ////////////////// int old_master_switch; file->taggedRead(&old_master_switch, sizeof(int), 1, "MASTER_SWITCH"); CmdOption **cmd_options_ = cmd_options[old_master_switch]; int n_cmd_options_ = n_cmd_options[old_master_switch]; for(int i = 0; i < n_cmd_options_; i++) { if(cmd_options_[i]->save) cmd_options_[i]->loadXFile(file); } } CmdLine::~CmdLine() { } } torch3-3.1.orig/core/CmdLine.h0000644000175000017500000001434510106445234016316 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef CMD_LINE_INC #define CMD_LINE_INC #include "Object.h" #include "CmdOption.h" #include "DiskXFile.h" namespace Torch { /** This class provides a useful interface for the user, to easily read some arguments/options from the command-line. Note that here, we make a difference between: \begin{itemize} \item {\bf options} which are not required. \item {\bf arguments} which are required. \end{itemize} Options: \begin{tabular}{lcll} "write log" & bool & Should I output the cmd.log file ? & [true] \end{tabular} @author Ronan Collobert (collober@idiap.ch) @see CmdOption */ class CmdLine : public Object { public: char *program_name; bool write_log; int n_master_switches; int *n_cmd_options; CmdOption ***cmd_options; char *text_info; char *working_directory; char **associated_files; int n_associated_files; int master_switch; char **argv; int argc; // ----- /// CmdLine(); /** Read the command-line. Call this function {\bf after} adding options/arguments that you need, with the help of the following functions. */ int read(int argc_, char **argv_); /** Print the help. Call this function {\bf after} adding options/arguments that you need, with the help of the following functions. */ void help(); //----- /** Functions for adding options. The calling order of the following functions will define the text order associated when you will call #help()#. Add an option (Int, Bool, Real, String). \begin{itemize} \item #name# the name of the option (must be unique). \item #ptr# is the pointer on the optional variable. \item #init_value# is the initialization value. \item #help# is the help text for this option. \end{itemize} The option will be setted to #value# in the command-line by printing "#name# #value#" */ /// void addICmdOption(const char *name, int *ptr, int init_value, const char *help="", bool save_it=false); /// void addBCmdOption(const char *name, bool *ptr, bool init_value, const char *help="", bool save_it=false); /// void addRCmdOption(const char *name, real *ptr, real init_value, const char *help="", bool save_it=false); /// void addSCmdOption(const char *name, char **ptr, const char *init_value, const char *help="", bool save_it=false); /** Functions for adding an argument. The argument will be setted to #value# in the command-line by writting "#value#" {\bf after} all the options. If there are N arguments, you have to write "#value1# #value2# #value3# ... #valueN#" to set them in the command-line. */ /// void addICmdArg(const char *name, int *ptr, const char *help="", bool save_it=false); /// void addBCmdArg(const char *name, bool *ptr, const char *help="", bool save_it=false); /// void addRCmdArg(const char *name, real *ptr, const char *help="", bool save_it=false); /// void addSCmdArg(const char *name, char **ptr, const char *help="", bool save_it=false); /// Add a text line in the help message. void addText(const char *text); /// Add a text at the beginnig of the help. void info(const char *text); /** Add a master switch. It creates an another type of command line. If the #text# is the first argument of the user command line, only the options corresponding to this new command line will be considered. */ void addMasterSwitch(const char *text); /** Set the working directory. Use it with #getPath()# and #getXFile()#. */ void setWorkingDirectory(const char* dirname); /** Get a full path. It adds the #working_directory# before the #filename#. This path will be deleted by CmdLine. */ char *getPath(const char *filename); /** Get a DiskXFile. It adds the #working_directory# before the #filename# and opens the file. This XFile will be deleted by CmdLine. */ DiskXFile *getXFile(const char *filename); /// Load the object from a file pointer (\emph{not the options}) virtual void loadXFile(XFile *file); /// Save the object to a file pointer (\emph{not the options}) virtual void saveXFile(XFile *file); //----- /** Add an option to the command line. Use this method if the wrappers that are provided are not sufficient. */ void addCmdOption(CmdOption *option); /** Write a log in #file#. If desired, the associated files can be printed. */ void writeLog(XFile *file, bool write_associated_files); virtual ~CmdLine(); }; } #endif torch3-3.1.orig/core/DataSet.cc0000644000175000017500000001007310106445234016460 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "DataSet.h" namespace Torch { DataSet::DataSet() { n_targets = 0; targets = NULL; n_inputs = 0; inputs = NULL; n_examples = 0; n_real_examples = 0; select_examples = false; selected_examples = NULL; subsets = NULL; n_examples_subsets = NULL; n_subsets = 0; // Indeterminated state... real_current_example_index = -1; // Pushed Examples... pushed_examples = new(allocator) Stack; } void DataSet::init(int n_examples_, int n_inputs_, int n_targets_) { n_examples = n_examples_; n_real_examples = n_examples_; n_inputs = n_inputs_; n_targets = n_targets_; select_examples = false; selected_examples = (int *)allocator->alloc(sizeof(int)*n_examples); for(int i = 0; i < n_examples; i++) selected_examples[i] = i; } void DataSet::pushSubset(int *subset_, int n_examples_) { subsets = (int **)allocator->realloc(subsets, sizeof(int *)*(n_subsets+1)); n_examples_subsets = (int *)allocator->realloc(n_examples_subsets, sizeof(int)*(n_subsets+1)); subsets[n_subsets] = subset_; n_examples_subsets[n_subsets] = n_examples_; n_subsets++; selected_examples = (int *)allocator->realloc(selected_examples, sizeof(int)*n_examples_); for(int t = 0; t < n_examples_; t++) { int index = subset_[t]; for(int s = n_subsets-2; s >= 0; s--) index = subsets[s][index]; selected_examples[t] = index; } select_examples = true; n_examples = n_examples_; } void DataSet::popSubset() { // Rq: realloc renvoie NULL si freed subsets = (int **)allocator->realloc(subsets, sizeof(int *)*(n_subsets-1)); n_examples_subsets = (int *)allocator->realloc(n_examples_subsets, sizeof(int)*(n_subsets-1)); n_subsets--; if(n_subsets == 0) { select_examples = false; n_examples = n_real_examples; selected_examples = (int *)allocator->realloc(selected_examples, sizeof(int)*n_examples); for(int i = 0; i < n_examples; i++) selected_examples[i] = i; } else { int n_examples_ = n_examples_subsets[n_subsets-1]; int *subset_ = subsets[n_subsets-1]; selected_examples = (int *)allocator->realloc(selected_examples, sizeof(int)*n_examples_); for(int t = 0; t < n_examples_; t++) { int index = subset_[t]; for(int s = n_subsets-2; s >= 0; s--) index = subsets[s][index]; selected_examples[t] = index; } n_examples = n_examples_; } } void DataSet::setExample(int t, bool set_inputs, bool set_targets) { int t_ = (select_examples ? selected_examples[t] : t); setRealExample(t_, set_inputs, set_targets); } DataSet::~DataSet() { } } torch3-3.1.orig/core/DataSet.h0000644000175000017500000001354510106445234016331 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef DATA_SET_INC #define DATA_SET_INC #include "Object.h" #include "Sequence.h" #include "PreProcessing.h" #include "Stack.h" struct FrameSubsets { int **subsets; int *subsets_sizes; int n_subsets; int n_selected_frames; int *selected_frames; }; namespace Torch { /** Provides an interface to manipulate all kind of data. A dataset contains two kind of things: inputs sequences and targets sequences. @author Ronan Collobert (collober@idiap.ch) */ class DataSet : public Object { public: //--- internal --- int **subsets; int *n_examples_subsets; int n_subsets; Stack *pushed_examples; //---------------- // True if a subset of the examples is selected. bool select_examples; /** The indices of all selected examples. When #select_examples# is false, it contains the indices of all examples. */ int *selected_examples; /// Frame size of #inputs#. int n_inputs; /// Frame size of #targets#. int n_targets; /** Index of the current example. Warning: it's the \emph{real} index and not the index in the #selected_examples# table. */ int real_current_example_index; /// Pointer on the inputs of the current example. Sequence *inputs; /// Pointer to the targets of the current example. Sequence *targets; /** Number of examples in the dataset. If you're using #select_examples#, it's the number of selected examples. */ int n_examples; /** Real number of examples in the dataset. It's the number of examples in memory. (= #n_examples# if #select_examples# is false) */ int n_real_examples; //----- /// DataSet(); /** Method which initializes some fields of the datasets. It has to be called only in the constructor of your subclasses. Just for developpers of new datasets. */ void init(int n_examples_, int n_inputs_, int n_targets_); /** Set #targets# and #inputs# to the targets and inputs of the example with the index #selected_examples[t]#. Warning: after a #setExample()# the previous selected example is \emph{not} supposed to exist... for that, use #pushExample()#. */ void setExample(int t, bool set_inputs=true, bool set_targets=true); /** Set #targets# and #inputs# to the targets and inputs of the example with the index #t#. If you create a new dataset, you \emph{must} update inside #current_example_index#. Warning: after a #setExample()# the previous selected example is \emph{not} supposed to exist... for that, use #pushExample()#. */ virtual void setRealExample(int t, bool set_inputs=true, bool set_targets=true) = 0; /** Set a new subset. \begin{itemize} \item #subset_# (of size #n_examples_#) is a set of indices which define a subset of #data#. \item if a #pushSubset()# has been already called, the next #pushSubset()# defines a subset of the previous subset, and so on... \item this function set #select_examples# to #true# and set the read indices of the examples in #selected_examples#. \end{itemize} */ virtual void pushSubset(int *subset_, int n_examples_); /** Remove the last subset. \begin{itemize} \item recomputes "selected_examples". \item if it was the last subset, set #select_examples# to #false#. \end{itemize} */ virtual void popSubset(); /** Tells that the current example must be kept in memory after next calls of #setExample()#. */ virtual void pushExample() = 0; /** Tells that the last pushed example will be now the current example, and therefore, will be forgeted after the next call of #setExample()#. */ virtual void popExample() = 0; /** Put in #n_input_frames# and #n_target_frames# the number of input frames and target frames for example #t#. This take subsets in account. If one field is #NULL#, it will not be returned. */ virtual void getNumberOfFrames(int t, int *n_input_frames, int *n_target_frames) = 0; /// Perform some pre-processing on data. virtual void preProcess(PreProcessing *pre_processing) = 0; //----- virtual ~DataSet(); }; } #endif torch3-3.1.orig/core/DiskDataSet.cc0000644000175000017500000001036210106445234017274 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "DiskDataSet.h" namespace Torch { DiskDataSet::DiskDataSet() { io_inputs = NULL; io_targets = NULL; pre_processes = new(allocator) PreProcessingList; } void DiskDataSet::init(IOSequence *io_inputs_, IOSequence *io_targets_) { io_inputs = io_inputs_; io_targets = io_targets_; int n_examples_ = 0; if(io_inputs) n_examples_ = io_inputs->n_sequences; if(io_targets) n_examples_ = io_targets->n_sequences; if(io_inputs && io_targets) { if(io_inputs->n_sequences != io_targets->n_sequences) error("DiskDataSet: inputs IO and targets IO don't have the same number of sequences!"); } DataSet::init(n_examples_, (io_inputs ? io_inputs->frame_size : 0), (io_targets ? io_targets->frame_size : 0)); if(n_inputs > 0) inputs = new(allocator) Sequence(0, n_inputs); if(n_targets > 0) targets = new(allocator) Sequence(0, n_targets); } void DiskDataSet::setRealExample(int t, bool set_inputs, bool set_targets) { // Rq: marche car dataset par avec un etat indetermine... if(t == real_current_example_index) return; if( (n_inputs > 0) && set_inputs ) { int n_frames = io_inputs->getNumberOfFrames(t); inputs->resize(n_frames); io_inputs->getSequence(t, inputs); if(pre_processes) { for(int i = 0; i < pre_processes->n_nodes; i++) pre_processes->nodes[i]->preProcessInputs(inputs); } } if( (n_targets > 0) && set_targets ) { int n_frames = io_targets->getNumberOfFrames(t); targets->resize(n_frames); io_targets->getSequence(t, targets); if(pre_processes) { for(int i = 0; i < pre_processes->n_nodes; i++) pre_processes->nodes[i]->preProcessTargets(targets); } } real_current_example_index = t; } void DiskDataSet::pushExample() { pushed_examples->push(&inputs, sizeof(Sequence *)); pushed_examples->push(&targets, sizeof(Sequence *)); pushed_examples->push(&real_current_example_index, sizeof(int)); if(n_inputs > 0) inputs = new(allocator) Sequence(0, n_inputs); if(n_targets > 0) targets = new(allocator) Sequence(0, n_targets); real_current_example_index = -1; } void DiskDataSet::popExample() { allocator->free(inputs); allocator->free(targets); pushed_examples->pop(); pushed_examples->pop(); pushed_examples->pop(); } void DiskDataSet::getNumberOfFrames(int t, int *n_input_frames, int *n_target_frames) { if( (n_inputs > 0) && n_input_frames ) *n_input_frames = io_inputs->getNumberOfFrames(selected_examples[t]); if( (n_targets > 0) && n_target_frames ) *n_target_frames = io_targets->getNumberOfFrames(selected_examples[t]); } void DiskDataSet::preProcess(PreProcessing *pre_processing) { pre_processes->addNode(pre_processing); } DiskDataSet::~DiskDataSet() { } } torch3-3.1.orig/core/DiskDataSet.h0000644000175000017500000000564610106445234017147 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef DISK_DATA_SET_INC #define DISK_DATA_SET_INC #include "DataSet.h" #include "IOSequence.h" #include "List.h" namespace Torch { /** Provides an interface to manipulate all kind of data which are kept on disk, and not fully loaded in memory. It uses #IOSequence#. Usefull for large databases. DiskMatDataSet is a good example if you plan to code a new DiskDataSet. @see DiskMatDataSet @see IOSequence @author Ronan Collobert (collober@idiap.ch) */ class DiskDataSet : public DataSet { public: /// List if pre processes to do PreProcessingList *pre_processes; /// IOMatrix which provides inputs. IOSequence *io_inputs; /// IOMatrix which provides targets. IOSequence *io_targets; /// DiskDataSet(); /** This function \emph{has to be called} by your sub-classes. You give here the IOMatrix which handle the inputs and the targets of your dataset. Should be called in the constructor of all yoyr sub-classes. */ void init(IOSequence *io_inputs_, IOSequence *io_targets_); virtual void getNumberOfFrames(int t, int *n_input_frames, int *n_target_frames); virtual void preProcess(PreProcessing *pre_processing); virtual void setRealExample(int t, bool set_inputs=true, bool set_targets=true); virtual void pushExample(); virtual void popExample(); //----- virtual ~DiskDataSet(); }; } #endif torch3-3.1.orig/core/DiskMatDataSet.cc0000644000175000017500000001577110106445234017747 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "DiskMatDataSet.h" #include "IOBufferize.h" #include "IOAscii.h" #include "IOMulti.h" #include "IOBin.h" #include "IOSub.h" namespace Torch { DiskMatDataSet::DiskMatDataSet(const char *filename, int n_inputs_, int n_targets_, bool one_file_is_one_sequence, int max_load, bool binary_mode) { if( (n_inputs_ < 0) && (n_targets < 0) ) error("DiskMatDataSet: cannot guess n_inputs n_targets!"); IOSequence *io_file = NULL; if(binary_mode) io_file = new(allocator) IOBin(filename, one_file_is_one_sequence, max_load, false); else io_file = new(allocator) IOAscii(filename, one_file_is_one_sequence, max_load); init_(io_file, n_inputs_, n_targets_); } DiskMatDataSet::DiskMatDataSet(char **filenames, int n_files_, int n_inputs_, int n_targets_, bool one_file_is_one_sequence, int max_load, bool binary_mode) { if(n_files_ <= 0) error("DiskMatDataSet: check the number of files!"); IOSequence **io_files = (IOSequence **)allocator->alloc(sizeof(IOSequence *)*n_files_); if(max_load > 0) { int i = 0; while( (max_load > 0) && (i < n_files_) ) { if(binary_mode) io_files[i] = new(allocator) IOBin(filenames[i], one_file_is_one_sequence, max_load, false); else io_files[i] = new(allocator) IOAscii(filenames[i], one_file_is_one_sequence, max_load); max_load -= io_files[i]->n_sequences; i++; } n_files_ = i; } else { if(binary_mode) { for(int i = 0; i < n_files_; i++) io_files[i] = new(allocator) IOBin(filenames[i], one_file_is_one_sequence, -1, false); } else { for(int i = 0; i < n_files_; i++) io_files[i] = new(allocator) IOAscii(filenames[i], one_file_is_one_sequence, -1); } } IOMulti *io_file = new(allocator) IOMulti(io_files, n_files_); init_(io_file, n_inputs_, n_targets_); } DiskMatDataSet::DiskMatDataSet(char **input_filenames, char **target_filenames, int n_files_, int max_load, bool binary_mode) { if(n_files_ <= 0) error("DiskMatDataSet: check the number of files!"); if(input_filenames) { IOSequence **input_io_files = (IOSequence **)allocator->alloc(sizeof(IOSequence *)*n_files_); int max_load_ = max_load; int n_files__ = 0; if(max_load_ > 0) { int i = 0; while( (max_load_ > 0) && (i < n_files_) ) { if(binary_mode) input_io_files[i] = new(allocator) IOBin(input_filenames[i], true, max_load_); else input_io_files[i] = new(allocator) IOAscii(input_filenames[i], true, max_load_); max_load_ -= input_io_files[i]->n_sequences; i++; } n_files__ = i; } else { if(binary_mode) { for(int i = 0; i < n_files_; i++) input_io_files[i] = new(allocator) IOBin(input_filenames[i], true, -1); } else { for(int i = 0; i < n_files_; i++) input_io_files[i] = new(allocator) IOAscii(input_filenames[i], true, -1); } n_files__ = n_files_; } io_inputs = new(allocator) IOMulti(input_io_files, n_files__); } if(target_filenames) { IOSequence **target_io_files = (IOSequence **)allocator->alloc(sizeof(IOSequence *)*n_files_); int max_load_ = max_load; int n_files__ = 0; if(max_load_ > 0) { int i = 0; while( (max_load_ > 0) && (i < n_files_) ) { if(binary_mode) target_io_files[i] = new(allocator) IOBin(target_filenames[i], true, max_load_); else target_io_files[i] = new(allocator) IOAscii(target_filenames[i], true, max_load_); max_load_ -= target_io_files[i]->n_sequences; i++; } n_files__ = i; } else { if(binary_mode) { for(int i = 0; i < n_files_; i++) target_io_files[i] = new(allocator) IOBin(target_filenames[i], true, -1); } else { for(int i = 0; i < n_files_; i++) target_io_files[i] = new(allocator) IOAscii(target_filenames[i], true, -1); } n_files__ = n_files_; } io_targets = new(allocator) IOMulti(target_io_files, n_files__); } DiskDataSet::init(io_inputs, io_targets); message("DiskMatDataSet: %d examples scanned [%d inputs and %d targets detected]", n_examples, n_inputs, n_targets); } void DiskMatDataSet::init_(IOSequence *io_file, int n_inputs_, int n_targets_) { if( (n_inputs_ > io_file->frame_size) || (n_targets_ > io_file->frame_size) ) error("DiskMatDataSet: n_inputs (%d) or n_targets (%d) too large (> %d) !", n_inputs_, n_targets_, io_file->frame_size); if(n_inputs_ < 0) n_inputs_ = io_file->frame_size - n_targets_; if(n_targets_ < 0) n_targets_ = io_file->frame_size - n_inputs_; if(io_file->frame_size != (n_inputs_ + n_targets_)) error("DiskMatDataSet: %d columns in the file != %d inputs + %d targets", io_file->frame_size, n_inputs_, n_targets_); IOBufferize *io_buffer = NULL; if( (n_inputs_ > 0) && (n_targets_ > 0) ) io_buffer = new(allocator) IOBufferize(io_file); if(n_inputs_ > 0) { if(n_targets_ > 0) io_inputs = new(allocator) IOSub(io_buffer, 0, n_inputs_); else io_inputs = io_file; } if(n_targets_ > 0) { if(n_inputs_ > 0) io_targets = new(allocator) IOSub(io_buffer, n_inputs_, n_targets_); else io_targets = io_file; } DiskDataSet::init(io_inputs, io_targets); message("DiskMatDataSet: %d examples scanned", n_examples); } DiskMatDataSet::~DiskMatDataSet() { } } torch3-3.1.orig/core/DiskMatDataSet.h0000644000175000017500000000706610106445234017607 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef DISK_MAT_DATA_SET_INC #define DISK_MAT_DATA_SET_INC #include "DiskDataSet.h" namespace Torch { /** Matrix DataSet On Disk... The standard dataset, with data not fully loaded in memory. Usefull for large databases. @see IOAscii @see IOBin @author Ronan Collobert (collober@idiap.ch) */ class DiskMatDataSet : public DiskDataSet { private: void init_(IOSequence *io_file, int n_inputs_, int n_targets_); public: /** Create a new dataset from the file #filename#. If the file contains only one sequence, set #one_file_is_one_sequence# to true. If there is several sequences, and you want only to load the first #n# ones, set #max_load# to #n# (else #max_load# should be a negative number). If #binary_mode# is true, the IOBin format will be used, else it will be the IOAscii format. Input and target sequence will have the same number of frames. For \emph{each} frame given by the dataset, the first #n_inputs_# real are for the inputs and then the next #n_targets_# real are for the targets. (#n_inputs_# is the input frame size and #n_targets_# is the target frame size). */ DiskMatDataSet(const char *filename, int n_inputs_, int n_targets_, bool one_file_is_one_sequence=false, int max_load=-1, bool binary_mode=false); /** Same as the previous constructor, but for several files. If #one_file_is_one_sequence# is true, each files will be considered as they had only one sequence. */ DiskMatDataSet(char **filenames, int n_files_, int n_inputs_, int n_targets_, bool one_file_is_one_sequence=false, int max_load=-1, bool binary_mode=false); /** Here the inputs and the targets are in separated files. Input and target frame sizes are therefore auto-detected. One file must correspond to one sequence. */ DiskMatDataSet(char **input_filenames, char **target_filenames, int n_files_, int max_load=-1, bool binary_mode=false); virtual ~DiskMatDataSet(); }; } #endif torch3-3.1.orig/core/DiskXFile.cc0000644000175000017500000001252510106445234016761 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "DiskXFile.h" #include "string_utils.h" #ifdef _MSC_VER #include #endif namespace Torch { bool DiskXFile::is_native_mode = true; DiskXFile::DiskXFile(const char *file_name, const char *open_flags) { #ifdef _MSC_VER _fmode = _O_BINARY; #endif its_a_pipe = false; bool zipped_file = false; if(!strcmp(open_flags, "r")) { if(strlen(file_name) > 3) { if(strcmp(file_name+strlen(file_name)-3, ".gz")) zipped_file = false; else zipped_file = true; } else zipped_file = false; if(zipped_file) { char *cmd_buffer = strConcat(2, "zcat ", file_name); file = fopen(file_name, "r"); if(!file) error("DiskXFile: cannot open the file <%s> for reading", file_name); fclose(file); file = popen(cmd_buffer, open_flags); if(!file) error("DiskXFile: cannot execute the command <%s>", file_name, cmd_buffer); free(cmd_buffer); } } if(!zipped_file) { file = fopen(file_name, open_flags); if(!file) error("DiskXFile: cannot open <%s> in mode <%s>. Sorry.", file_name, open_flags); } is_opened = true; // Buffer buffer_block = NULL; buffer_block_size = 0; } DiskXFile::DiskXFile(FILE *file_) { file = file_; is_opened = false; its_a_pipe = false; // Buffer buffer_block = NULL; buffer_block_size = 0; } int DiskXFile::read(void *ptr, int block_size, int n_blocks) { int melanie = fread(ptr, block_size, n_blocks, file); if(!is_native_mode) reverseMemory(ptr, block_size, n_blocks); return(melanie); } int DiskXFile::write(void *ptr, int block_size, int n_blocks) { if(!is_native_mode) reverseMemory(ptr, block_size, n_blocks); int melanie = fwrite(ptr, block_size, n_blocks, file); if(!is_native_mode) reverseMemory(ptr, block_size, n_blocks); return(melanie); } int DiskXFile::eof() { return(feof(file)); } int DiskXFile::flush() { return(fflush(file)); } int DiskXFile::seek(long offset, int whence) { return(fseek(file, offset, whence)); } long DiskXFile::tell() { return(ftell(file)); } void DiskXFile::rewind() { ::rewind(file); } int DiskXFile::printf(const char *format, ...) { va_list args; va_start(args, format); int res = vfprintf(file, format, args); va_end(args); return(res); } int DiskXFile::scanf(const char *format, void *ptr) { int res = fscanf(file, format, ptr); return(res); } char *DiskXFile::gets(char *dest, int size_) { return(fgets(dest, size_, file)); } //----- bool DiskXFile::isLittleEndianProcessor() { int x = 7; char *ptr = (char *)&x; if(ptr[0] == 0) return(false); else return(true); } bool DiskXFile::isBigEndianProcessor() { return(!isLittleEndianProcessor()); } bool DiskXFile::isNativeMode() { return(is_native_mode); } void DiskXFile::setNativeMode() { is_native_mode = true; } void DiskXFile::setLittleEndianMode() { if(isLittleEndianProcessor()) is_native_mode = true; else is_native_mode = false; } void DiskXFile::setBigEndianMode() { if(isBigEndianProcessor()) is_native_mode = true; else is_native_mode = false; } void DiskXFile::reverseMemory(void *ptr_, int block_size, int n_blocks) { if(block_size == 1) return; char *ptr = (char *)ptr_; char *ptrr, *ptrw; if(block_size > buffer_block_size) { allocator->free(buffer_block); buffer_block = (char *)allocator->alloc(block_size); } for(int i = 0; i < n_blocks; i++) { ptrr = ptr + ((i+1)*block_size); ptrw = buffer_block; for(int j = 0; j < block_size; j++) { ptrr--; *ptrw++ = *ptrr; } ptrr = buffer_block; ptrw = ptr + (i*block_size); for(int j = 0; j < block_size; j++) *ptrw++ = *ptrr++; } } //----- DiskXFile::~DiskXFile() { if(is_opened) { if(its_a_pipe) pclose(file); else fclose(file); } } } torch3-3.1.orig/core/DiskXFile.h0000644000175000017500000000701110106445234016615 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef DISK_X_FILE_INC #define DISK_X_FILE_INC #include "XFile.h" namespace Torch { /** A file on the disk. @author Ronan Collobert (collober@idiap.ch) */ class DiskXFile : public XFile { private: static bool is_native_mode; char *buffer_block; int buffer_block_size; void reverseMemory(void *ptr_, int block_size, int n_blocks); public: FILE *file; bool is_opened; bool its_a_pipe; /// Open "file_name" with the flags #open_flags# DiskXFile(const char *file_name, const char *open_flags); /// Use the given file... DiskXFile(FILE *file_); //----- /// Returns #true# if the processor uses the little endian coding format. static bool isLittleEndianProcessor(); /// Returns #true# if the processor uses the big endian coding format. static bool isBigEndianProcessor(); /// Returns #true# if we'll load/save using the native mode. static bool isNativeMode(); /** We'll load/save using native mode. We use little endian iff the computer uses little endian. We use big endian iff the computer uses big endian. */ static void setNativeMode(); /** We'll load/save using little endian mode. It means that if the computer doesn't use Little Endian, data will be converted. */ static void setLittleEndianMode(); /** We'll load/save using big endian mode. It means that if the computer doesn't use Big Endian, data will be converted. */ static void setBigEndianMode(); //----- virtual int read(void *ptr, int block_size, int n_blocks); virtual int write(void *ptr, int block_size, int n_blocks); virtual int eof(); virtual int flush(); virtual int seek(long offset, int whence); virtual long tell(); virtual void rewind(); virtual int printf(const char *format, ...); virtual int scanf(const char *format, void *ptr) ; virtual char *gets(char *dest, int size_); //----- virtual ~DiskXFile(); }; } #endif torch3-3.1.orig/core/FileListCmdOption.cc0000644000175000017500000000771310106445234020472 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "FileListCmdOption.h" #include "DiskXFile.h" namespace Torch { FileListCmdOption::FileListCmdOption(const char *name_, const char *help_, bool save_) : CmdOption(name_, "<[-one_file] file_name>", help_, save_) { n_files = 0; file_names = NULL; } void FileListCmdOption::read(int *argc_, char ***argv_) { char **argv = *argv_; if(*argc_ == 0) error("FileListCmdOption: cannot correctly set <%s>", name); // Special case... if(!strcmp("-one_file", argv[0])) { (*argc_)--; (*argv_)++; argv = *argv_; if(*argc_ == 0) error("FileListCmdOption: cannot correctly set <%s>", name); n_files = 1; file_names = (char **)allocator->alloc(sizeof(char *)); file_names[0] = (char *)allocator->alloc(strlen(argv[0])+1); strcpy(file_names[0], argv[0]); (*argc_)--; (*argv_)++; return; } /// Read the contents of the file... DiskXFile file_(argv[0], "r"); char *melanie = (char *)allocator->alloc(1024); file_.read(melanie, 1, 1024); melanie[1023] = '\0'; file_.rewind(); char* endp_; strtol(melanie, &endp_, 10); if( (*endp_ != '\0') && (*endp_ != '\n') ) { do { file_.gets(melanie, 1024); n_files++; } while (!file_.eof()); n_files--; file_.rewind(); } else file_.scanf("%d", &n_files); message("FileListCmdOption: %d files detected", n_files); file_names = (char **)allocator->alloc(sizeof(char *)*n_files); for(int i = 0; i < n_files; i++) { file_.scanf("%s", melanie); file_names[i] = (char *)allocator->alloc(strlen(melanie)+1); strcpy(file_names[i], melanie); } allocator->free(melanie); //////////////////////////////////// (*argc_)--; (*argv_)++; } void FileListCmdOption::loadXFile(XFile *file) { file->taggedRead(&n_files, sizeof(int), 1, "NFILES"); file_names = (char **)allocator->alloc(sizeof(char *)*n_files); for(int i = 0; i < n_files; i++) { int melanie; file->taggedRead(&melanie, sizeof(int), 1, "SIZE"); file_names[i] = (char *)allocator->alloc(melanie); file->taggedRead(file_names[i], 1, melanie, "FILE"); } } void FileListCmdOption::saveXFile(XFile *file) { file->taggedWrite(&n_files, sizeof(int), 1, "NFILES"); for(int i = 0; i < n_files; i++) { int melanie = strlen(file_names[i])+1; file->taggedWrite(&melanie, sizeof(int), 1, "SIZE"); file->taggedWrite(file_names[i], 1, melanie, "FILE"); } } FileListCmdOption::~FileListCmdOption() { } } torch3-3.1.orig/core/FileListCmdOption.h0000644000175000017500000000472310106445234020332 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef FILE_LIST_CMD_OPTION_INC #define FILE_LIST_CMD_OPTION_INC #include "CmdOption.h" namespace Torch { /** This class take a file name in the command line, and reads a list of files contained in this file. In fact, there is a special case: it checks first if "-one_file" the current argument on the command line. If true, then it reads the next argument which will be the only file in the list. @author Ronan Collobert (collober@idiap.ch) @see CmdLine */ class FileListCmdOption : public CmdOption { public: /// Contains the file names after reading the command line. char **file_names; /// Number of files that have been read. int n_files; /// FileListCmdOption(const char *name_, const char *help_="", bool save_=false); virtual void read(int *argc_, char ***argv_); virtual void loadXFile(XFile *file); virtual void saveXFile(XFile *file); ~FileListCmdOption(); }; } #endif torch3-3.1.orig/core/FrameSelectorDataSet.cc0000644000175000017500000001432610106445234021141 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "FrameSelectorDataSet.h" namespace Torch { FrameSelectorDataSet::FrameSelectorDataSet(DataSet *data_) { data = data_; if(data->n_examples == 0) error("FrameSelectorDataSet: cannot handle DataSet with no examples"); n_selected_input_frames = (int *)allocator->alloc(sizeof(int)*data->n_examples); n_selected_target_frames = (int *)allocator->alloc(sizeof(int)*data->n_examples); input_frames_indices = (int **)allocator->alloc(sizeof(int *)*data->n_examples); target_frames_indices = (int **)allocator->alloc(sizeof(int *)*data->n_examples); for(int i = 0; i < data->n_examples; i++) { n_selected_input_frames[i] = 0; input_frames_indices[i] = NULL; n_selected_target_frames[i] = 0; target_frames_indices[i] = NULL; } DataSet::init(data->n_examples, data->n_inputs, data->n_targets); data->setExample(0); if(n_inputs > 0) inputs_buffer = new(allocator) Sequence(0, n_inputs); else inputs_buffer = NULL; if(n_targets > 0) targets_buffer = new(allocator) Sequence(0, n_targets); else targets_buffer = NULL; } void FrameSelectorDataSet::selectInputFrames(int t_, int *frames_indices_, int n_frames_) { int t = selected_examples[t_]; if(input_frames_indices[t]) allocator->free(input_frames_indices[t]); input_frames_indices[t] = (int *)allocator->alloc(sizeof(int)*n_frames_); for(int i = 0; i < n_frames_; i++) input_frames_indices[t][i] = frames_indices_[i]; n_selected_input_frames[t] = n_frames_; } void FrameSelectorDataSet::selectTargetFrames(int t_, int *frames_indices_, int n_frames_) { int t = selected_examples[t_]; if(target_frames_indices[t]) allocator->free(target_frames_indices[t]); target_frames_indices[t] = (int *)allocator->alloc(sizeof(int)*n_frames_); for(int i = 0; i < n_frames_; i++) target_frames_indices[t][i] = frames_indices_[i]; n_selected_target_frames[t] = n_frames_; } void FrameSelectorDataSet::unselectInputFrames(int t_) { int t = selected_examples[t_]; if(input_frames_indices[t]) allocator->free(input_frames_indices[t]); input_frames_indices[t] = NULL; n_selected_input_frames[t] = 0; } void FrameSelectorDataSet::unselectTargetFrames(int t_) { int t = selected_examples[t_]; if(target_frames_indices[t]) allocator->free(target_frames_indices[t]); target_frames_indices[t] = NULL; n_selected_target_frames[t] = 0; } void FrameSelectorDataSet::getNumberOfFrames(int t_, int *n_input_frames_, int *n_target_frames_) { int t = selected_examples[t_]; if( (n_inputs > 0) && n_input_frames_ ) { if(input_frames_indices[t]) *n_input_frames_ = n_selected_input_frames[t]; else data->getNumberOfFrames(t, n_input_frames_, NULL); } if( (n_targets > 0) && n_target_frames_ ) { if(target_frames_indices[t]) *n_target_frames_ = n_selected_target_frames[t]; else data->getNumberOfFrames(t, NULL, n_target_frames_); } } void FrameSelectorDataSet::setRealExample(int t, bool set_inputs, bool set_targets) { data->setExample(t); if(set_inputs) { if(input_frames_indices[t]) { inputs_buffer->resize(n_selected_input_frames[t], false); for(int i = 0; i < n_selected_input_frames[t]; i++) inputs_buffer->frames[i] = data->inputs->frames[input_frames_indices[t][i]]; inputs = inputs_buffer; } else inputs = data->inputs; } if(set_targets) { if(target_frames_indices[t]) { targets_buffer->resize(n_selected_target_frames[t], false); for(int i = 0; i < n_selected_target_frames[t]; i++) targets_buffer->frames[i] = data->targets->frames[target_frames_indices[t][i]]; targets = targets_buffer; } else targets = data->targets; } real_current_example_index = t; } void FrameSelectorDataSet::preProcess(PreProcessing *pre_processing) { error("FrameSelectorDataSet: pre-processing not supported"); } void FrameSelectorDataSet::pushExample() { data->pushExample(); pushed_examples->push(&inputs_buffer, sizeof(Sequence *)); pushed_examples->push(&targets_buffer, sizeof(Sequence *)); pushed_examples->push(&inputs, sizeof(Sequence *)); pushed_examples->push(&targets, sizeof(Sequence *)); pushed_examples->push(&real_current_example_index, sizeof(int)); if(n_inputs > 0) inputs_buffer = new(allocator) Sequence(0, n_inputs); if(n_targets > 0) targets_buffer = new(allocator) Sequence(0, n_targets); real_current_example_index = -1; } void FrameSelectorDataSet::popExample() { allocator->free(inputs_buffer); allocator->free(targets_buffer); pushed_examples->pop(); pushed_examples->pop(); pushed_examples->pop(); pushed_examples->pop(); pushed_examples->pop(); data->popExample(); } FrameSelectorDataSet::~FrameSelectorDataSet() { } } torch3-3.1.orig/core/FrameSelectorDataSet.h0000644000175000017500000000720710106445234021003 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef FRAME_SELECTOR_DATA_SET_INC #define FRAME_SELECTOR_DATA_SET_INC #include "DataSet.h" namespace Torch { /** A dataset used to select some frames of another dataset. It takes a dataset in the constructor. Then you call select functions to select frames. After that, when you will do a #setExample()# the example of the previous dataset will be returned in #inputs# and #targets# fields, with the right frames... @author Ronan Collobert (collober@idiap.ch) */ class FrameSelectorDataSet : public DataSet { private: Sequence *inputs_buffer; Sequence *targets_buffer; public: /// DataSet where we will select frames DataSet *data; /// Number of input selected frames, for each example int *n_selected_input_frames; /// Number of target selected frames, for each example int *n_selected_target_frames; /// Indices of input selected frames, for each example int **input_frames_indices; /// Indices of target selected frames, for each example int **target_frames_indices; /// FrameSelectorDataSet(DataSet *data_); /** Select input frames of the example #t#. Frames indices are given by #frames_indices_#. The size of #frames_indices_# is given by #n_frames_#. Takes in account pushed subsets. */ void selectInputFrames(int t_, int *frames_indices_, int n_frames_); /// Same as #selectInputFrames()#, but for targets. void selectTargetFrames(int t_, int *frames_indices_, int n_frames_); /** Unselect inputs frames of the example #t#. Takes in account pushed subsets. */ void unselectInputFrames(int t_); /** Unselect targets frames of the example #t#. Takes in account pushed subsets. */ void unselectTargetFrames(int t_); virtual void preProcess(PreProcessing *pre_processing); virtual void getNumberOfFrames(int t_, int *n_input_frames_, int *n_target_frames_); virtual void setRealExample(int t, bool set_inputs=true, bool set_targets=true); virtual void pushExample(); virtual void popExample(); //----- virtual ~FrameSelectorDataSet(); }; } #endif torch3-3.1.orig/core/IOAscii.cc0000644000175000017500000001011510106445234016410 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "IOAscii.h" namespace Torch { #ifdef USE_DOUBLE #define REAL_FORMAT "%lf" #else #define REAL_FORMAT "%f" #endif void IOAscii::saveSequence(XFile *file, Sequence *sequence) { file->printf("%d %d\n", sequence->n_frames, sequence->frame_size); for(int i = 0; i < sequence->n_frames; i++) { real *z = sequence->frames[i]; for(int j = 0; j < sequence->frame_size; j++) file->printf("%g ", z[j]); file->printf("\n"); } } IOAscii::IOAscii(const char *filename_, bool one_file_is_one_sequence_, int max_load_) { // Boaf... one_file_is_one_sequence = one_file_is_one_sequence_; max_load = max_load_; filename = (char *)allocator->alloc(strlen(filename_)+1); strcpy(filename, filename_); // Read the header... DiskXFile f(filename, "r"); f.scanf("%d", &n_total_frames); f.scanf("%d", &frame_size); if( (max_load > 0) && (max_load < n_total_frames) && (!one_file_is_one_sequence) ) { n_total_frames = max_load; message("IOAscii: loading only %d frames", n_total_frames); } if(one_file_is_one_sequence) n_sequences = 1; else n_sequences = n_total_frames; file = NULL; current_frame_index = -1; } void IOAscii::getSequence(int t, Sequence *sequence) { // Cas simple: on lit tout le bordel if(one_file_is_one_sequence) { file = new(allocator) DiskXFile(filename, "r"); int murielle; file->scanf("%d", &murielle); file->scanf("%d", &murielle); for(int i = 0; i < n_total_frames; i++) { real *dest_ = sequence->frames[i]; for(int j = 0; j < frame_size; j++) file->scanf(REAL_FORMAT, &dest_[j]); } allocator->free(file); } else { // Sequentiel ? if(t != current_frame_index+1) error("IOAscii: sorry, data are accessible only in a sequential way"); // Doit-on ouvrir le putain de fichier ? if(current_frame_index < 0) { file = new(allocator) DiskXFile(filename, "r"); int murielle; file->scanf("%d", &murielle); file->scanf("%d", &murielle); } // Lis la frame mec real *dest_ = sequence->frames[0]; for(int j = 0; j < frame_size; j++) file->scanf(REAL_FORMAT, &dest_[j]); // Si je suis a la fin du fichier, je le zigouille. current_frame_index++; if(current_frame_index == n_total_frames-1) { allocator->free(file); current_frame_index = -1; } } } int IOAscii::getNumberOfFrames(int t) { if(one_file_is_one_sequence) return n_total_frames; else return 1; } int IOAscii::getTotalNumberOfFrames() { return n_total_frames; } IOAscii::~IOAscii() { } } torch3-3.1.orig/core/IOAscii.h0000644000175000017500000000653010106445234016260 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef IO_ASCII_INC #define IO_ASCII_INC #include "IOSequence.h" #include "DiskXFile.h" namespace Torch { /** Handles the standard Ascii sequence format in Torch. The format is the following: \begin{itemize} \item First line should contain the number of frames and the frame size (two int separated by a space) of the file. \item Next lines are frames of the sequence. One frame is one line. (number_of_columns real at each line, separated by a space). \end{itemize} @author Ronan Collobert (collober@idiap.ch) */ class IOAscii : public IOSequence { private: DiskXFile *file; int current_frame_index; public: bool one_file_is_one_sequence; int n_total_frames; char *filename; int max_load; /** Reads the sequence contained in #filename#. If #one_file_is_one_sequence# is false, #getSequence()# will return one sequence with one frame at each call. (If calling #getSequence(t, foo)#, it will put in the sequence #foo# the frame corresponding to the line #t# of the file). Note also that if #one_file_is_one_sequence# is false, the access to the IO must be sequential when calling #getSequence()#. If #max_load_# is positive, it loads only the first #max_load_# frames, if #one_file_is_one_sequence# is false. The file will be opened when reading the first sequence, and closed when reading the last one. */ IOAscii(const char *filename_, bool one_file_is_one_sequence_=false, int max_load_=-1); /// Saves one #sequence# in #file# using the ascii format. static void saveSequence(XFile *file, Sequence *sequence); virtual void getSequence(int t, Sequence *sequence); virtual int getNumberOfFrames(int t); virtual int getTotalNumberOfFrames(); virtual ~IOAscii(); }; } #endif torch3-3.1.orig/core/IOBin.cc0000644000175000017500000001051410106445234016073 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "IOBin.h" namespace Torch { void IOBin::saveSequence(XFile *file, Sequence *sequence) { file->write(&sequence->n_frames, sizeof(int), 1); file->write(&sequence->frame_size, sizeof(int), 1); for(int i = 0; i < sequence->n_frames; i++) file->write(sequence->frames[i], sizeof(real), sequence->frame_size); } IOBin::IOBin(const char *filename_, bool one_file_is_one_sequence_, int max_load_, bool is_sequential_) { // Boaf... one_file_is_one_sequence = one_file_is_one_sequence_; max_load = max_load_; is_sequential = is_sequential_; filename = (char *)allocator->alloc(strlen(filename_)+1); strcpy(filename, filename_); // Read the header... DiskXFile f(filename, "r"); f.read(&n_total_frames, sizeof(int), 1); f.read(&frame_size, sizeof(int), 1); if( (max_load > 0) && (max_load < n_total_frames) && (!one_file_is_one_sequence) ) { n_total_frames = max_load; message("IOBin: loading only %d frames", n_total_frames); } if(one_file_is_one_sequence) n_sequences = 1; else n_sequences = n_total_frames; file = NULL; current_frame_index = -1; } void IOBin::getSequence(int t, Sequence *sequence) { // Cas simple: on lit tout le bordel if(one_file_is_one_sequence) { file = new(allocator) DiskXFile(filename, "r"); int murielle; file->read(&murielle, sizeof(int), 1); // fseek non car marche pas dans pipes file->read(&murielle, sizeof(int), 1); for(int i = 0; i < n_total_frames; i++) file->read(sequence->frames[i], sizeof(real), frame_size); allocator->free(file); } else { // Sequentiel ? if(is_sequential) { if(t != current_frame_index+1) error("IOBin: sorry, data are accessible only in a sequential way"); // Doit-on ouvrir le putain de fichier ? if(current_frame_index < 0) { file = new(allocator) DiskXFile(filename, "r"); int murielle; file->read(&murielle, sizeof(int), 1); // fseek non car marche pas dans pipes file->read(&murielle, sizeof(int), 1); } } else { file = new(allocator) DiskXFile(filename, "r"); if(file->seek(t*frame_size*sizeof(real)+2*sizeof(int), SEEK_CUR) != 0) error("IOBin: cannot seek in your file!"); } // Lis la frame mec file->read(sequence->frames[0], sizeof(real), frame_size); if(is_sequential) { // Si je suis a la fin du fichier, je le zigouille. current_frame_index++; if(current_frame_index == n_total_frames-1) { allocator->free(file); current_frame_index = -1; } } else allocator->free(file); } } int IOBin::getNumberOfFrames(int t) { if(one_file_is_one_sequence) return n_total_frames; else return 1; } int IOBin::getTotalNumberOfFrames() { return n_total_frames; } IOBin::~IOBin() { } } torch3-3.1.orig/core/IOBin.h0000644000175000017500000000677410106445234015752 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef IO_BIN_INC #define IO_BIN_INC #include "IOSequence.h" #include "DiskXFile.h" namespace Torch { /** Handles the standard binary sequence format in Torch. The format is the following: \begin{itemize} \item Two int at the beginning of the file for the number of frames and the frame size of the sequence in the file. \item After that, the sequence data, frame after frame. (frame_size real per row). \end{itemize} @author Ronan Collobert (collober@idiap.ch) */ class IOBin : public IOSequence { protected: DiskXFile *file; int current_frame_index; public: bool one_file_is_one_sequence; int n_total_frames; char *filename; int max_load; bool is_sequential; /** Reads the sequence contained in #filename#. If #one_file_is_one_sequence# is false, #getSequence()# will return one sequence with one frame at each call. (If calling #getSequence(t, foo)#, it will put in the sequence #foo# the frame corresponding to the line #t# of the file). Note also that if #one_file_is_one_sequence# is false, the access to the IO must be sequential when calling #getSequence()# if #is_sequential# is true. (Sequential mode is faster). If #max_load_# is positive, it loads only the first #max_load_# frames, if #one_file_is_one_sequence# is false. The file will be opened when reading the first sequence, and closed when reading the last one if #is_sequential# is true. Otherwise, the file will be opened and closed each time you call #getSequence()#. */ IOBin(const char *filename_, bool one_file_is_one_sequence_=false, int max_load_=-1, bool is_sequential=true); /// Saves #sequence# in #file# using the binary format. static void saveSequence(XFile *file, Sequence *sequence); virtual void getSequence(int t, Sequence *sequence); virtual int getNumberOfFrames(int t); virtual int getTotalNumberOfFrames(); virtual ~IOBin(); }; } #endif torch3-3.1.orig/core/IOBufferize.cc0000644000175000017500000000463510106445234017313 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "IOBufferize.h" namespace Torch { IOBufferize::IOBufferize(IOSequence *io_torch_) { io_torch = io_torch_; n_sequences = io_torch->n_sequences; frame_size = io_torch->frame_size; bufferized_sequence = new(allocator) Sequence(0, frame_size); bufferized_sequence_index = -1; } void IOBufferize::getSequence(int t, Sequence *sequence) { bufferizeSequence(t); sequence->copy(bufferized_sequence); } void IOBufferize::bufferizeSequence(int t) { if(t != bufferized_sequence_index) { int n_frames = io_torch->getNumberOfFrames(t); bufferized_sequence->resize(n_frames); io_torch->getSequence(t, bufferized_sequence); bufferized_sequence_index = t; } } int IOBufferize::getNumberOfFrames(int t) { return io_torch->getNumberOfFrames(t); } int IOBufferize::getTotalNumberOfFrames() { return io_torch->getTotalNumberOfFrames(); } IOBufferize::~IOBufferize() { } } torch3-3.1.orig/core/IOBufferize.h0000644000175000017500000000464710106445234017160 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef IO_BUFFERIZE_INC #define IO_BUFFERIZE_INC #include "IOSequence.h" namespace Torch { /** This IO bufferizes the asked sequence of a given IO when calling #getSequence()#. Then, if the next call of #getSequence()# ask for the same sequence, it will return it without asking the given IO. @author Ronan Collobert (collober@idiap.ch) */ class IOBufferize : public IOSequence { public: /// IO to be bufferized. IOSequence *io_torch; /// The bufferized sequence. Sequence *bufferized_sequence; /// The bufferized sequence index. int bufferized_sequence_index; /// IOBufferize(IOSequence *io_torch_); /// Force sequence #t# to be bufferized. void bufferizeSequence(int t); virtual void getSequence(int t, Sequence *sequence); virtual int getNumberOfFrames(int t); virtual int getTotalNumberOfFrames(); virtual ~IOBufferize(); }; } #endif torch3-3.1.orig/core/IOMulti.cc0000644000175000017500000000537210106445234016463 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "IOMulti.h" namespace Torch { IOMulti::IOMulti(IOSequence **io_files_, int n_files_) { io_files = io_files_; n_files = n_files_; if(n_files <= 0) error("IOMulti: check the number of files!"); n_sequences = 0; for(int i = 0; i < n_files; i++) n_sequences += io_files[i]->n_sequences; frame_size = io_files[0]->frame_size; indices = (int *)allocator->alloc(sizeof(int)*n_sequences); offsets = (int *)allocator->alloc(sizeof(int)*n_sequences); int *ptr_indices = indices; int *ptr_offsets = offsets; for(int i = 0; i < n_files; i++) { if(frame_size != io_files[i]->frame_size) error("IOMulti: provided IO have incompatible frame sizes"); for(int j = 0; j < io_files[i]->n_sequences; j++) { *ptr_indices++ = i; *ptr_offsets++ = j; } } } void IOMulti::getSequence(int t, Sequence *sequence) { io_files[indices[t]]->getSequence(offsets[t], sequence); } int IOMulti::getNumberOfFrames(int t) { return io_files[indices[t]]->getNumberOfFrames(offsets[t]); } int IOMulti::getTotalNumberOfFrames() { int n_total_frames_ = 0; for(int i = 0; i < n_files; i++) n_total_frames_ += io_files[i]->getTotalNumberOfFrames(); return n_total_frames_; } IOMulti::~IOMulti() { } } torch3-3.1.orig/core/IOMulti.h0000644000175000017500000000431410106445234016320 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef IO_MULTI_INC #define IO_MULTI_INC #include "IOSequence.h" namespace Torch { /** This IO takes several IOSequence, and will act as if you had concatened all these IOSequence when calling #getMatrix()#. @author Ronan Collobert (collober@idiap.ch) */ class IOMulti : public IOSequence { public: IOSequence **io_files; int n_files; int *indices; int *offsets; /// Takes #n_files_# pointers to IOSequence, given in #io_files_#. IOMulti(IOSequence **io_files_, int n_files_); virtual void getSequence(int t, Sequence *sequence); virtual int getNumberOfFrames(int t); virtual int getTotalNumberOfFrames(); virtual ~IOMulti(); }; } #endif torch3-3.1.orig/core/IOSequence.cc0000644000175000017500000000323610106445234017136 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "IOSequence.h" namespace Torch { IOSequence::IOSequence() { n_sequences = 0; frame_size = 0; } IOSequence::~IOSequence() { } } torch3-3.1.orig/core/IOSequence.h0000644000175000017500000000464610106445234017006 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef IO_SEQUENCE_INC #define IO_SEQUENCE_INC #include "Sequence.h" namespace Torch { /** Class which provides an ensemble of sequences, which have the same frame size, but could have different number of frames. @author Ronan Collobert (collober@idiap.ch) */ class IOSequence : public Object { public: /// Number of sequences in the interface. int n_sequences; /// Frame size of each sequence. int frame_size; /// IOSequence(); /// Returns the number of frames of the sequence indexed by #t#. virtual int getNumberOfFrames(int t) = 0; /** Write the sequence #t# in #sequence#. Sequence must have the size returned by #getNumberOfFrames()#. */ virtual void getSequence(int t, Sequence *sequence) = 0; /// Returns the total number of frames in the IO. virtual int getTotalNumberOfFrames() = 0; virtual ~IOSequence(); }; } #endif torch3-3.1.orig/core/IOSequenceArray.cc0000644000175000017500000000651310106445234020136 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "IOSequenceArray.h" #include "XFile.h" namespace Torch { IOSequenceArray::IOSequenceArray() { } void IOSequenceArray::write(XFile *file, Sequence **sequences_array, int n_sequences) { if(n_sequences <= 0) return; int frame_size = sequences_array[0]->frame_size; int n_total_frames = 0; for(int i = 0; i < n_sequences; i++) { if(frame_size != sequences_array[i]->frame_size) error("IOSequenceArray: sorry, sequences don't have the same frame size"); n_total_frames += sequences_array[i]->n_frames; } file->taggedWrite(&n_total_frames, sizeof(int), 1, "NTF"); file->taggedWrite(&frame_size, sizeof(int), 1, "FS"); for(int i = 0; i < n_sequences; i++) { file->taggedWrite(&sequences_array[i]->n_frames, sizeof(int), 1, "NF"); sequences_array[i]->saveXFile(file); } } void IOSequenceArray::read(XFile *file, Sequence **sequences_array, int n_sequences, Allocator *allocator_) { if(!allocator_) allocator_ = allocator; int n_total_frames, frame_size; file->taggedRead(&n_total_frames, sizeof(int), 1, "NTF"); file->taggedRead(&frame_size, sizeof(int), 1, "FS"); Sequence *sequences_buffer = (Sequence *)allocator_->alloc(sizeof(Sequence)*n_sequences); real **frames_buffer = (real **)allocator_->alloc(sizeof(real *)*n_total_frames); real *buffer = (real *)allocator_->alloc(sizeof(real)*n_total_frames*frame_size); for(int i = 0; i < n_total_frames; i++) frames_buffer[i] = buffer+i*frame_size; for(int i = 0; i < n_sequences; i++) { int n_frames_; file->taggedRead(&n_frames_, sizeof(int), 1, "NF"); sequences_array[i] = new(allocator_, sequences_buffer) Sequence(frames_buffer, n_frames_, frame_size); sequences_array[i]->loadXFile(file); frames_buffer += n_frames_; sequences_buffer++; } } IOSequenceArray::~IOSequenceArray() { } } torch3-3.1.orig/core/IOSequenceArray.h0000644000175000017500000000544510106445234020003 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef IO_SEQUENCE_ARRAY_INC #define IO_SEQUENCE_ARRAY_INC #include "Object.h" #include "Sequence.h" namespace Torch { /** Load and save in an efficiently manner an array of sequences. (in an arbitrary binary format). This could be useful for some (rare) classes which needs to save sequences which could be in another format that the standard sequence format (if you plan to use them with strange subclasses of #Sequence#). @author Ronan Collobert (collober@idiap.ch) */ class IOSequenceArray : public Object { public: /// IOSequenceArray(); /** Read an array of sequences in #file#. #n_sequences# will be read. If #allocator_# is non-NULL, it will be used to allocate the memory of the sequences. Else, the memory of the sequences will be destroyed when destroying the class. #sequences_array# must have the size #n_sequences#. */ virtual void read(XFile *file, Sequence **sequences_array, int n_sequences, Allocator *allocator_=NULL); /** Write an array of sequences in #file#. #n_sequences# will be written. #sequences_array# must have the size #n_sequences#. */ virtual void write(XFile *file, Sequence **sequences_array, int n_sequences); virtual ~IOSequenceArray(); }; } #endif torch3-3.1.orig/core/IOSub.cc0000644000175000017500000000444410106445234016121 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "IOSub.h" namespace Torch { IOSub::IOSub(IOBufferize *io_torch_, int offset_, int frame_size_) { io_torch = io_torch_; offset = offset_; n_sequences = io_torch->n_sequences; frame_size = frame_size_; if(frame_size > (io_torch->frame_size-offset)) error("IOSub: the provided frame size is too large..."); } void IOSub::getSequence(int t, Sequence *sequence) { io_torch->bufferizeSequence(t); Sequence *src = io_torch->bufferized_sequence; for(int i = 0; i < sequence->n_frames; i++) memcpy(sequence->frames[i], src->frames[i]+offset, sizeof(real)*frame_size); } int IOSub::getNumberOfFrames(int t) { return io_torch->getNumberOfFrames(t); } int IOSub::getTotalNumberOfFrames() { return io_torch->getTotalNumberOfFrames(); } IOSub::~IOSub() { } } torch3-3.1.orig/core/IOSub.h0000644000175000017500000000436510106445234015765 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef IO_SUB_INC #define IO_SUB_INC #include "IOBufferize.h" namespace Torch { /** IOSequence which does a selection of adjacent columns on another IOSequence, when calling #getMatrix()#. @author Ronan Collobert (collober@idiap.ch) */ class IOSub : public IOSequence { public: IOBufferize *io_torch; int offset; /** We will select columns indexed by #offset_#...#offset_+frame_size_-1# of the frames provided by #io_torch_#, when calling #getSequence()#. */ IOSub(IOBufferize *io_torch_, int offset_, int frame_size_); virtual void getSequence(int t, Sequence *sequence); virtual int getNumberOfFrames(int t); virtual int getTotalNumberOfFrames(); virtual ~IOSub(); }; } #endif torch3-3.1.orig/core/KFold.cc0000644000175000017500000001054710106445234016140 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // and Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "KFold.h" #include "Random.h" namespace Torch { KFold::KFold(Trainer* trainer_, int kfold_) { kfold = kfold_; trainer = trainer_; train_subsets = (int**)allocator->alloc(sizeof(int*)*kfold); test_subsets = (int**)allocator->alloc(sizeof(int*)*kfold); n_train_subsets = (int*)allocator->alloc(sizeof(int)*kfold); n_test_subsets = (int*)allocator->alloc(sizeof(int)*kfold); for (int i=0;i= 0.5 ? fucking_hack_because_round_sucks+1 : fucking_hack_because_round_sucks ); for(int i = 0; i < kfold; i++) { n_train_subsets[i] = 0; n_test_subsets[i] = 0; for(int j = 0; j < i*taille_subset; j++) train_subsets[i][n_train_subsets[i]++] = mix_subset[j]; for(int j = i*taille_subset; j < min((i+1)*taille_subset, n_examples); j++) test_subsets[i][n_test_subsets[i]++] = mix_subset[j]; if(i == kfold-1) { for(int j = min((i+1)*taille_subset, n_examples); j < n_examples; j++) test_subsets[i][n_test_subsets[i]++] = mix_subset[j]; } else { for(int j = (i+1)*taille_subset; j < n_examples; j++) train_subsets[i][n_train_subsets[i]++] = mix_subset[j]; } } free(mix_subset); } void KFold::crossValidate(DataSet *data, MeasurerList *train_measurers, MeasurerList *test_measurers, MeasurerList *cross_valid_measurers) { for (int i=0;irealloc(train_subsets[i], sizeof(int)*data->n_examples); test_subsets[i] = (int*)allocator->realloc(test_subsets[i], sizeof(int)*data->n_examples); } sample(data->n_examples); if(cross_valid_measurers) { for(int i = 0; i < cross_valid_measurers->n_nodes; i++) cross_valid_measurers->nodes[i]->reset(); } for(int i = 0; i < kfold; i++) { data->pushSubset(train_subsets[i], n_train_subsets[i]); trainer->machine->reset(); trainer->train(data, train_measurers); data->popSubset(); data->pushSubset(test_subsets[i], n_test_subsets[i]); trainer->test(test_measurers); data->popSubset(); if(cross_valid_measurers) { for(int j = 0; j < cross_valid_measurers->n_nodes; j++) cross_valid_measurers->nodes[j]->measureExample(); } } if(cross_valid_measurers) { for(int i = 0; i < cross_valid_measurers->n_nodes; i++) { cross_valid_measurers->nodes[i]->measureIteration(); cross_valid_measurers->nodes[i]->measureEnd(); } } } KFold::~KFold() { } } torch3-3.1.orig/core/KFold.h0000644000175000017500000000621510106445234015777 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // and Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef KFOLD_INC #define KFOLD_INC #include "Trainer.h" namespace Torch { /** Provides an interface to sample data, for use by methods such as cross-validation @author Samy Bengio (bengio@idiap.ch) @author Ronan Collobert (collober@idiap.ch) */ class KFold : public Object { public: /// Training examples for each fold int** train_subsets; /// Test examples for each fold int** test_subsets; /// Number of training examples for each fold... int* n_train_subsets; /// Number of test examples for each fold... int* n_test_subsets; // Trainer used to do KFold Trainer* trainer; // Number of folds! int kfold; /// KFold(Trainer* trainer_, int kfold_); /** Do a cross-validation over #data#. #train_measurers# are called in each "train pass" for each fold. #test_measurers# are called in each "test pass" for each fold. #cross_valid_measurers# are called during the cross-validation loop. */ virtual void crossValidate(DataSet *data, MeasurerList *train_measurers=NULL, MeasurerList *test_measurers=NULL, MeasurerList *cross_valid_measurers=NULL); /** Prepare the sample. Given #n_examples#, puts the right examples-indices in #train_subsets# and #test_subsets#, and updates #n_train_subsets# and #n_test_subsets#. (You don't have to allocate these arrays). The provided sample function is a standard sample for cross-validation, but you could imagine what you want! */ virtual void sample(int n_examples); virtual ~KFold(); }; } #endif torch3-3.1.orig/core/List.h0000644000175000017500000000616710106445234015721 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef LIST_INC #define LIST_INC #include "Object.h" namespace Torch { /** List Classes. A list named NAME with node pointer type TYPE has the following aspect: \begin{verbatim} class NAME { /// Pointer which can be used as you want. TYPE *nodes; /// Number of nodes. int n_nodes; /// Constructor. NAME(); /// Add a list at the end of the current list. void add(NAME *list); /// Add a node at the end of the current list. void addNode(TYPE *node_) }; \end{verbatim} To declare a new list, just use the macro (in ".h" files): \begin{verbatim} #DEFINE_NEW_LIST(NAME, TYPE); \end{verbatim} and to implement this list, use the macro (in ".cc" files): \begin{verbatim} #define IMPLEMENT_NEW_LIST(NAME, TYPE) \end{verbatim} The name of a list which TYPE nodes should be something like "TYPEList". @author Ronan Collobert (collober@idiap.ch) @type class @name List @args @memo */ #define DEFINE_NEW_LIST(NAME, TYPE) \ class NAME : public Object \ { \ public: \ TYPE **nodes; \ int n_nodes; \ \ NAME(); \ void add(NAME *list); \ void addNode(TYPE *node); \ } #define IMPLEMENT_NEW_LIST(NAME, TYPE) \ \ NAME::NAME() \ { \ nodes = NULL; \ n_nodes = 0; \ } \ \ void NAME::add(NAME *list) \ { \ if(!list->n_nodes) \ return; \ \ nodes = (TYPE **)allocator->realloc(nodes, sizeof(TYPE *)*(n_nodes+list->n_nodes)); \ for(int i = 0; i < list->n_nodes; i++) \ nodes[n_nodes++] = list->nodes[i]; \ } \ \ void NAME::addNode(TYPE *node) \ { \ nodes = (TYPE **)allocator->realloc(nodes, sizeof(TYPE *)*(n_nodes+1)); \ nodes[n_nodes++] = node; \ } } #endif torch3-3.1.orig/core/MSEMeasurer.cc0000644000175000017500000000541110106445234017263 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "MSEMeasurer.h" namespace Torch { MSEMeasurer::MSEMeasurer(Sequence *inputs_, DataSet *data_, XFile *file_) : Measurer(data_, file_) { inputs = inputs_; internal_error = 0; addBOption("average examples", &average_examples, true, "divided by the number of examples"); addBOption("average frame size", &average_frame_size, true, "divided by the frame size"); addBOption("average frames", &average_frames, true, "divided by the number of frames"); } void MSEMeasurer::measureExample() { Sequence *desired = data->targets; real sum = 0; for(int i = 0; i < inputs->n_frames; i++) { real *src_1 = desired->frames[i]; real *src_2 = inputs->frames[i]; for(int j = 0; j < inputs->frame_size; j++) { real z = src_2[j] - src_1[j]; sum += z*z; } } if(average_frames) sum /= inputs->n_frames; if(average_frame_size) sum /= inputs->frame_size; internal_error += sum; } void MSEMeasurer::measureIteration() { if(average_examples) internal_error /= data->n_examples; if(binary_mode) file->write(&internal_error, sizeof(real), 1); else file->printf("%g\n", internal_error); file->flush(); reset(); } void MSEMeasurer::reset() { internal_error = 0; } MSEMeasurer::~MSEMeasurer() { } } torch3-3.1.orig/core/MSEMeasurer.h0000644000175000017500000000542010106445234017125 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef MSE_MEASURER_INC #define MSE_MEASURER_INC #include "Measurer.h" namespace Torch { /** Mean Squared Error measurer. Compute the MSE between its inputs, and the targets of its associated #DataSet#. addBOption("average examples", &average_examples, true, "divided by the number of examples"); addBOption("average frame size", &average_frame_size, true, "divided by the frame size"); addBOption("average frames", &average_frames, true, "divided by the number of frames"); Options: \begin{tabular}{lcll} "average examples" & bool & divided by the number of examples & [true]\\ "average frame size" & bool & divided by the frame size & [true]\\ "average frames" & bool & divided by the number of frames & [true] \end{tabular} @author Ronan Collobert (collober@idiap.ch) */ class MSEMeasurer : public Measurer { public: bool average_examples; bool average_frame_size; bool average_frames; real internal_error; Sequence *inputs; //----- /// MSEMeasurer(Sequence *inputs_, DataSet *data_, XFile *file_); //----- virtual void reset(); virtual void measureExample(); virtual void measureIteration(); virtual ~MSEMeasurer(); }; } #endif torch3-3.1.orig/core/Machine.cc0000644000175000017500000000333610106445234016503 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Machine.h" #include "DataSet.h" namespace Torch { Machine::Machine() { outputs = NULL; } void Machine::setDataSet(DataSet *dataset_) { } void Machine::reset() { } Machine::~Machine() { } } torch3-3.1.orig/core/Machine.h0000644000175000017500000000433210106445234016342 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef MACHINE_INC #define MACHINE_INC #include "Object.h" #include "DataSet.h" namespace Torch { /** #Object# which can compute some outputs, given some inputs. @author Ronan Collobert (collober@idiap.ch) */ class Machine : public Object { public: /// The outputs of the machine. Sequence *outputs; /// Machine(); /// Given a sequence, update #outputs#. virtual void forward(Sequence *sequence) = 0; /// Reset the machine. virtual void reset(); /** Some machine depends on a dataset. The trainer has to call this function when changing the dataset. */ virtual void setDataSet(DataSet *dataset_); virtual ~Machine(); }; } #endif torch3-3.1.orig/core/Makefile0000644000175000017500000000172710106445233016271 0ustar kalfakalfa00000000000000# get user and architecture specific options OS := $(shell uname -s) TORCHDIR := $(shell cd ..; pwd) include ../Makefile_options_$(OS) CC_FILES := $(wildcard *.cc) OBJS := $(foreach f,$(CC_FILES),$(OBJS_DIR)/$(patsubst %.cc,%.o,$(f))) all: $(LIBTORCH) $(LIBTORCH): $(OBJS) @echo "Archiving..." @$(AR) $(LIBTORCH) $(OBJS) $(OBJS_DIR)/%.o: %.cc @echo $< @$(CC) $(CFLAGS_$(MODE)) $(INCS) -o $@ -c $< distclean: @\rm -f .deps_* clean: @echo "Remove objects file and dependencies..." @\rm -Rf $(OBJS) $(LIBTORCH) @\rm -f .deps_$(VERSION_KEY) depend: @echo "Tracking dependencies..." @\rm -f .deps_$(VERSION_KEY) @for file in *.cc ; do printf "$(OBJS_DIR)/" >> .deps_$(VERSION_KEY); $(DEP) $(CFLAGS_$(MODE)) $(INCS) $$file >> .deps_$(VERSION_KEY); done .deps_$(VERSION_KEY): @echo ">>> Please do a 'make depend' <<<" exit 10 ifneq ($(MAKECMDGOALS),distclean) ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),depend) include .deps_$(VERSION_KEY) endif endif endif torch3-3.1.orig/core/MatDataSet.cc0000644000175000017500000001631010106445234017122 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "IOBufferize.h" #include "MatDataSet.h" #include "IOAscii.h" #include "IOMulti.h" #include "IOBin.h" #include "IOSub.h" namespace Torch { MatDataSet::MatDataSet(const char *filename, int n_inputs_, int n_targets_, bool one_file_is_one_sequence, int max_load, bool binary_mode) { io_allocator = new Allocator; if( (n_inputs_ < 0) && (n_targets < 0) ) error("MatDataSet: cannot guess n_inputs n_targets!"); IOSequence *io_file = NULL; if(binary_mode) io_file = new(io_allocator) IOBin(filename, one_file_is_one_sequence, max_load); else io_file = new(io_allocator) IOAscii(filename, one_file_is_one_sequence, max_load); init_(io_file, n_inputs_, n_targets_); } MatDataSet::MatDataSet(char **filenames, int n_files_, int n_inputs_, int n_targets_, bool one_file_is_one_sequence, int max_load, bool binary_mode) { io_allocator = new Allocator; if(n_files_ <= 0) error("MatDataSet: check the number of files!"); IOSequence **io_files = (IOSequence **)io_allocator->alloc(sizeof(IOSequence *)*n_files_); if(max_load > 0) { int i = 0; while( (max_load > 0) && (i < n_files_) ) { if(binary_mode) io_files[i] = new(io_allocator) IOBin(filenames[i], one_file_is_one_sequence, max_load); else io_files[i] = new(io_allocator) IOAscii(filenames[i], one_file_is_one_sequence, max_load); max_load -= io_files[i]->n_sequences; i++; } n_files_ = i; } else { if(binary_mode) { for(int i = 0; i < n_files_; i++) io_files[i] = new(io_allocator) IOBin(filenames[i], one_file_is_one_sequence); } else { for(int i = 0; i < n_files_; i++) io_files[i] = new(io_allocator) IOAscii(filenames[i], one_file_is_one_sequence); } } IOMulti *io_file = new(io_allocator) IOMulti(io_files, n_files_); init_(io_file, n_inputs_, n_targets_); } MatDataSet::MatDataSet(char **input_filenames, char **target_filenames, int n_files_, int max_load, bool binary_mode) { IOSequence *io_inputs = NULL; IOSequence *io_targets = NULL; io_allocator = new Allocator; if(n_files_ <= 0) error("MatDataSet: check the number of files!"); if(input_filenames) { IOSequence **input_io_files = (IOSequence **)io_allocator->alloc(sizeof(IOSequence *)*n_files_); int max_load_ = max_load; int n_files__ = 0; if(max_load_ > 0) { int i = 0; while( (max_load_ > 0) && (i < n_files_) ) { if(binary_mode) input_io_files[i] = new(io_allocator) IOBin(input_filenames[i], true, max_load_); else input_io_files[i] = new(io_allocator) IOAscii(input_filenames[i], true, max_load_); max_load_ -= input_io_files[i]->n_sequences; i++; } n_files__ = i; } else { if(binary_mode) { for(int i = 0; i < n_files_; i++) input_io_files[i] = new(io_allocator) IOBin(input_filenames[i], true); } else { for(int i = 0; i < n_files_; i++) input_io_files[i] = new(io_allocator) IOAscii(input_filenames[i], true); } n_files__ = n_files_; } io_inputs = new(io_allocator) IOMulti(input_io_files, n_files__); } if(target_filenames) { IOSequence **target_io_files = (IOSequence **)io_allocator->alloc(sizeof(IOSequence *)*n_files_); int max_load_ = max_load; int n_files__ = 0; if(max_load_ > 0) { int i = 0; while( (max_load_ > 0) && (i < n_files_) ) { if(binary_mode) target_io_files[i] = new(io_allocator) IOBin(target_filenames[i], true, max_load_); else target_io_files[i] = new(io_allocator) IOAscii(target_filenames[i], true, max_load_); max_load_ -= target_io_files[i]->n_sequences; i++; } n_files__ = i; } else { if(binary_mode) { for(int i = 0; i < n_files_; i++) target_io_files[i] = new(io_allocator) IOBin(target_filenames[i], true); } else { for(int i = 0; i < n_files_; i++) target_io_files[i] = new(io_allocator) IOAscii(target_filenames[i], true); } n_files__ = n_files_; } io_targets = new(io_allocator) IOMulti(target_io_files, n_files__); } MemoryDataSet::init(io_inputs, io_targets); message("MatDataSet: %d examples loaded [%d inputs and %d targets detected]", n_examples, n_inputs, n_targets); delete io_allocator; } void MatDataSet::init_(IOSequence *io_file, int n_inputs_, int n_targets_) { IOSequence *io_inputs = NULL; IOSequence *io_targets = NULL; if( (n_inputs_ > io_file->frame_size) || (n_targets_ > io_file->frame_size) ) error("MatDataSet: n_inputs (%d) or n_targets (%d) too large (> %d) !", n_inputs_, n_targets_, io_file->frame_size); if(n_inputs_ < 0) n_inputs_ = io_file->frame_size - n_targets_; if(n_targets_ < 0) n_targets_ = io_file->frame_size - n_inputs_; if(io_file->frame_size != (n_inputs_ + n_targets_)) error("MatDataSet: %d columns in the file != %d inputs + %d targets", io_file->frame_size, n_inputs_, n_targets_); IOBufferize *io_buffer = NULL; if( (n_inputs_ > 0) && (n_targets_ > 0) ) io_buffer = new(io_allocator) IOBufferize(io_file); if(n_inputs_ > 0) { if(n_targets_ > 0) io_inputs = new(io_allocator) IOSub(io_buffer, 0, n_inputs_); else io_inputs = io_file; } if(n_targets_ > 0) { if(n_inputs_ > 0) io_targets = new(io_allocator) IOSub(io_buffer, n_inputs_, n_targets_); else io_targets = io_file; } MemoryDataSet::init(io_inputs, io_targets); message("MatDataSet: %d examples loaded", n_examples); delete io_allocator; } MatDataSet::~MatDataSet() { } } torch3-3.1.orig/core/MatDataSet.h0000644000175000017500000000704110106445234016765 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef MAT_DATA_SET_INC #define MAT_DATA_SET_INC #include "MemoryDataSet.h" namespace Torch { /** Matrix DataSet... The standard dataset, with data fully loaded in memory. Usefull for large databases. @see IOAscii @see IOBin @author Ronan Collobert (collober@idiap.ch) */ class MatDataSet : public MemoryDataSet { private: void init_(IOSequence *io_file, int n_inputs_, int n_targets_); Allocator *io_allocator; public: /** Create a new dataset from the file #filename#. If the file contains only one sequence, set #one_file_is_one_sequence# to true. If there is several sequences, and you want only to load the first #n# ones, set #max_load# to #n# (else #max_load# should be a negative number). If #binary_mode# is true, the IOBin format will be used, else it will be the IOAscii format. Input and target sequence will have the same number of frames. For \emph{each} frame given by the dataset, the first #n_inputs_# real are for the inputs and then the next #n_targets_# real are for the targets. (#n_inputs_# is the input frame size and #n_targets_# is the target frame size). */ MatDataSet(const char *filename, int n_inputs_, int n_targets_, bool one_file_is_one_sequence=false, int max_load=-1, bool binary_mode=false); /** Same as the previous constructor, but for several files. If #one_file_is_one_sequence# is true, each files will be considered as they had only one sequence. */ MatDataSet(char **filenames, int n_files_, int n_inputs_, int n_targets_, bool one_file_is_one_sequence=false, int max_load=-1, bool binary_mode=false); /** Here the inputs and the targets are in separated files. Input and target frame sizes are therefore auto-detected. One file must correspond to one sequence. */ MatDataSet(char **input_filenames, char **target_filenames, int n_files_, int max_load=-1, bool binary_mode=false); virtual ~MatDataSet(); }; } #endif torch3-3.1.orig/core/MeanVarNorm.cc0000644000175000017500000001267710106445234017334 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "MeanVarNorm.h" #include "XFile.h" namespace Torch { MeanVarNorm::MeanVarNorm(DataSet *data, bool norm_inputs, bool norm_targets) { inputs_mean = NULL; inputs_stdv = NULL; targets_mean = NULL; targets_stdv = NULL; n_inputs = data->n_inputs; n_targets = data->n_targets; if(norm_inputs) { inputs_mean = (real *)allocator->alloc(sizeof(real)*n_inputs); inputs_stdv = (real *)allocator->alloc(sizeof(real)*n_inputs); for(int i = 0; i < n_inputs; i++) { inputs_mean[i] = 0; inputs_stdv[i] = 0; } } if(norm_targets) { targets_mean = (real *)allocator->alloc(sizeof(real)*n_targets); targets_stdv = (real *)allocator->alloc(sizeof(real)*n_targets); for(int i = 0; i < n_targets; i++) { targets_mean[i] = 0; targets_stdv[i] = 0; } } int n_total_input_frames = 0; int n_total_target_frames = 0; for(int t = 0; t < data->n_examples; t++) { data->setExample(t); // Les inputs if(norm_inputs) { for(int i = 0; i < data->inputs->n_frames; i++) { real *src_ = data->inputs->frames[i]; for(int j = 0; j < n_inputs; j++) { real z = src_[j]; inputs_mean[j] += z; inputs_stdv[j] += z*z; } } n_total_input_frames += data->inputs->n_frames; } // Les targets if(norm_targets) { for(int i = 0; i < data->targets->n_frames; i++) { real *src_ = data->targets->frames[i]; for(int j = 0; j < n_targets; j++) { real z = src_[j]; targets_mean[j] += z; targets_stdv[j] += z*z; } } n_total_target_frames += data->targets->n_frames; } } if(norm_inputs) { for(int i = 0; i < n_inputs; i++) { inputs_mean[i] /= (real)n_total_input_frames; inputs_stdv[i] /= (real)n_total_input_frames; inputs_stdv[i] -= inputs_mean[i]*inputs_mean[i]; if(inputs_stdv[i] <= 0) { warning("MeanVarNorm: input column %d has a null stdv. Replaced by 1.", i); inputs_stdv[i] = 1.; } else inputs_stdv[i] = sqrt(inputs_stdv[i]); } } if(norm_targets) { for(int i = 0; i < n_targets; i++) { targets_mean[i] /= (real)n_total_target_frames; targets_stdv[i] /= (real)n_total_target_frames; targets_stdv[i] -= targets_mean[i]*targets_mean[i]; if(targets_stdv[i] <= 0) { warning("MeanVarNorm: target column %d has a null stdv. Replaced by 1.", i); targets_stdv[i] = 1.; } else targets_stdv[i] = sqrt(targets_stdv[i]); } } } void MeanVarNorm::normalizeSequence(Sequence *sequence, real *mean, real *stdv) { for(int i = 0; i < sequence->n_frames; i++) { real *ptr_ = sequence->frames[i]; for(int k = 0; k < sequence->frame_size; k++) ptr_[k] = (ptr_[k] - mean[k])/stdv[k]; } } void MeanVarNorm::preProcessInputs(Sequence *inputs) { if(!inputs_mean) return; normalizeSequence(inputs, inputs_mean, inputs_stdv); } void MeanVarNorm::preProcessTargets(Sequence *targets) { if(!targets_mean) return; normalizeSequence(targets, targets_mean, targets_stdv); } void MeanVarNorm::loadXFile(XFile *file) { if(inputs_mean) { file->taggedRead(inputs_mean, sizeof(real), n_inputs, "IMEANS"); file->taggedRead(inputs_stdv, sizeof(real), n_inputs, "ISTDVS"); } if(targets_mean) { file->taggedRead(targets_mean, sizeof(real), n_targets, "TMEANS"); file->taggedRead(targets_stdv, sizeof(real), n_targets, "TSTDVS"); } } void MeanVarNorm::saveXFile(XFile *file) { if(inputs_mean) { file->taggedWrite(inputs_mean, sizeof(real), n_inputs, "IMEANS"); file->taggedWrite(inputs_stdv, sizeof(real), n_inputs, "ISTDVS"); } if(targets_mean) { file->taggedWrite(targets_mean, sizeof(real), n_targets, "TMEANS"); file->taggedWrite(targets_stdv, sizeof(real), n_targets, "TSTDVS"); } } MeanVarNorm::~MeanVarNorm() { } } torch3-3.1.orig/core/MeanVarNorm.h0000644000175000017500000000600110106445234017156 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef MEAN_VAR_NORM_INC #define MEAN_VAR_NORM_INC #include "PreProcessing.h" #include "DataSet.h" namespace Torch { /** In the constructor, it computes the mean and the standard deviation over all the frames in the given DataSet (by default, only for inputs). Then, when calling pre-processing methods, it normalizes each column by this computed mean and stdv. (substracts the mean, then divides by the standard deviation). As a result, the mean of the full set of frames given by the concatenation of all calls to #setExample()# will be 0, and the variance will be 1. @author Ronan Collobert (collober@idiap.ch) */ class MeanVarNorm : public PreProcessing { private: void normalizeSequence(Sequence *sequence, real *mean, real *stdv); public: /// Input frame size int n_inputs; /// Target frame size int n_targets; /// Inputs means array real *inputs_mean; /// Targets means array real *targets_mean; /// Inputs standard deviations array real *inputs_stdv; /// Targets standard deviations array real *targets_stdv; /// MeanVarNorm(DataSet *data, bool norm_inputs=true, bool norm_targets=false); virtual void preProcessInputs(Sequence *inputs); virtual void preProcessTargets(Sequence *targets); /// Load means and standard deviations virtual void loadXFile(XFile *file); /// Save means and standard deviations virtual void saveXFile(XFile *file); virtual ~MeanVarNorm(); }; } #endif torch3-3.1.orig/core/Measurer.cc0000644000175000017500000000364210106445234016722 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Measurer.h" #include "DataSet.h" #include "DiskXFile.h" namespace Torch { Measurer::Measurer(DataSet *data_, XFile *file_) { file = file_; data = data_; addBOption("binary mode", &binary_mode, false, "binary mode"); } void Measurer::reset() { } void Measurer::measureExample() { } void Measurer::measureIteration() { } void Measurer::measureEnd() { } Measurer::~Measurer() { } } torch3-3.1.orig/core/Measurer.h0000644000175000017500000000612010106445234016556 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef MEASURER_INC #define MEASURER_INC #include "Object.h" #include "Machine.h" #include "DataSet.h" #include "XFile.h" namespace Torch { /** Used to measure what you want during training/testing. Usually, trainers call measurers. The #DataSet# associated to the measurer allow us to know when the measurer should be called. (if the #DataSet# is the train dataset, the measurer will be called during the train phase...) Options: \begin{tabular}{lcll} "binary mode" & bool & binary mode for output & [false] \end{tabular} @author Ronan Collobert (collober@idiap.ch) */ class Measurer : public Object { public: /// The measurer save measures in this file. XFile *file; /// The associated #DataSet#. DataSet *data; /// Is the measurer in binary mode ? bool binary_mode; //----- /** Measurer with the associated #DataSet# #data_#, and put results in the file #file_#. */ Measurer(DataSet *data_, XFile *file_); /** Measure something for the current example. (This example has been selected in #data# by the trainer) */ virtual void measureExample(); /** Measure something after the current iteration. (After the call of #measureExample()# for each example of #data#) */ virtual void measureIteration(); /// Measure something at the end of the training/testing phase. virtual void measureEnd(); /// Reset the measurer. (By default, do nothing). virtual void reset(); //----- virtual ~Measurer(); }; } #endif torch3-3.1.orig/core/MemoryDataSet.cc0000644000175000017500000001374710106445234017664 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "MemoryDataSet.h" namespace Torch { MemoryDataSet::MemoryDataSet() { inputs_array = NULL; targets_array = NULL; } void MemoryDataSet::setRealExample(int t, bool set_inputs, bool set_targets) { real_current_example_index = t; if(inputs_array && set_inputs) inputs = inputs_array[t]; if(targets_array && set_targets) targets = targets_array[t]; } void MemoryDataSet::pushExample() { pushed_examples->push(&inputs, sizeof(Sequence *)); pushed_examples->push(&targets, sizeof(Sequence *)); pushed_examples->push(&real_current_example_index, sizeof(int)); } void MemoryDataSet::popExample() { pushed_examples->pop(); pushed_examples->pop(); pushed_examples->pop(); } void MemoryDataSet::init(IOSequence *io_inputs, IOSequence *io_targets) { int n_examples_ = 0; if(io_inputs) n_examples_ = io_inputs->n_sequences; if(io_targets) n_examples_ = io_targets->n_sequences; if(io_inputs && io_targets) { if(io_inputs->n_sequences != io_targets->n_sequences) error("MemoryDataSet: inputs IO and targets IO don't have the same number of sequences!"); } // The parent DataSet::init(n_examples_, (io_inputs ? io_inputs->frame_size : 0), (io_targets ? io_targets->frame_size : 0)); // Yeah. if(n_inputs > 0) { inputs_array = (Sequence **)allocator->alloc(sizeof(Sequence *)*n_examples); allocData(io_inputs, inputs_array); } if(n_targets > 0) { targets_array = (Sequence **)allocator->alloc(sizeof(Sequence *)*n_examples); allocData(io_targets, targets_array); } for(int t = 0; t < n_examples; t++) { if(n_inputs > 0) io_inputs->getSequence(t, inputs_array[t]); if(n_targets > 0) io_targets->getSequence(t, targets_array[t]); } } void MemoryDataSet::setInputs(Sequence **inputs_, int n_sequences_) { if(n_sequences_ <= 0) error("MemoryDataSet: invalid number of sequences in provided inputs"); // Deja alloue ? if(selected_examples) { if(n_sequences_ != n_real_examples) error("MemoryDataSet: invalid number of sequences in provided inputs"); } else DataSet::init(n_sequences_, inputs_[0]->frame_size, 0); n_inputs = inputs_[0]->frame_size; for(int t = 0; t < n_sequences_; t++) { if(inputs_[t]->frame_size != n_inputs) error("MemoryDataSet: sorry, provided inputs sequences don't have the same frame size"); } inputs_array = inputs_; } void MemoryDataSet::setTargets(Sequence **targets_, int n_sequences_) { if(n_sequences_ <= 0) error("MemoryDataSet: invalid number of sequences in provided targets"); // Deja alloue ? if(selected_examples) { if(n_sequences_ != n_real_examples) error("MemoryDataSet: invalid number of sequences in provided targets"); } else DataSet::init(n_sequences_, 0, targets_[0]->frame_size); n_targets = targets_[0]->frame_size; for(int t = 0; t < n_sequences_; t++) { if(targets_[t]->frame_size != n_targets) error("MemoryDataSet: sorry, provided targets sequences don't have the same frame size"); } targets_array = targets_; } void MemoryDataSet::getNumberOfFrames(int t, int *n_input_frames_, int *n_target_frames_) { if( (n_inputs > 0) && n_input_frames_ ) *n_input_frames_ = inputs_array[selected_examples[t]]->n_frames; if( (n_targets > 0) && n_target_frames_ ) *n_target_frames_ = targets_array[selected_examples[t]]->n_frames; } void MemoryDataSet::allocData(IOSequence *io_torch, Sequence **sequences_array) { int n_total_frames = io_torch->getTotalNumberOfFrames(); int frame_size = io_torch->frame_size; Sequence *sequences_buffer = (Sequence *)allocator->alloc(sizeof(Sequence)*n_examples); real **frames_pointers_buffer = (real **)allocator->alloc(sizeof(real *)*n_total_frames); real *frames_buffer = (real *)allocator->alloc(sizeof(real)*n_total_frames*frame_size); for(int i = 0; i < n_total_frames; i++) frames_pointers_buffer[i] = frames_buffer+i*frame_size; for(int t = 0; t < io_torch->n_sequences; t++) { int n_frames = io_torch->getNumberOfFrames(t); sequences_array[t] = new(allocator, sequences_buffer) Sequence(frames_pointers_buffer, n_frames, frame_size); sequences_buffer++; frames_pointers_buffer += n_frames; } } void MemoryDataSet::preProcess(PreProcessing *pre_processing) { for(int t = 0; t < n_examples; t++) { setExample(t); if(n_inputs > 0) pre_processing->preProcessInputs(inputs); if(n_targets > 0) pre_processing->preProcessTargets(targets); } } MemoryDataSet::~MemoryDataSet() { } } torch3-3.1.orig/core/MemoryDataSet.h0000644000175000017500000000646510106445234017525 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef MEMORY_DATA_SET_INC #define MEMORY_DATA_SET_INC #include "DataSet.h" #include "IOSequence.h" namespace Torch { /** DataSet where data is fully loaded in memory. Inputs and targets are put in two arrays: #inputs_array# and #targets_array#. You can fill these fields by using the #init()# method (if you are using IOSequence). But you could imagine a MemoryDataSet where you fill these fields by hand (based on the #init()# method). MatDataSet is a good example if you plan to do a new MemoryDataSet. @see MatDataSet @author Ronan Collobert (collober@idiap.ch) */ class MemoryDataSet : public DataSet { private: virtual void allocData(IOSequence *io_torch, Sequence **sequences_array); public: /// Inputs array. Sequence **inputs_array; /// Targets array. Sequence **targets_array; /// MemoryDataSet(); /** May help you to initialize the DataSet if you're using IOSequence. You should call this method in the constructor of your subsclasses. Initialize #n_examples#, #n_real_examples#, #n_inputs# and #n_targets#. #inputs_array# and #targets_array# filled with sequences given by the IOSequence classes. */ void init(IOSequence *io_inputs, IOSequence *io_outputs); /// Override current inputs array. void setInputs(Sequence **inputs_, int n_sequences_); /// Override current targets array. void setTargets(Sequence **targets_, int n_sequences_); virtual void getNumberOfFrames(int t, int *n_input_frames_, int *n_target_frames_); virtual void preProcess(PreProcessing *pre_processing); virtual void setRealExample(int t, bool set_inputs=true, bool set_targets=true); virtual void pushExample(); virtual void popExample(); //----- virtual ~MemoryDataSet(); }; } #endif torch3-3.1.orig/core/MemoryXFile.cc0000644000175000017500000002524110106445234017336 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "MemoryXFile.h" namespace Torch { IMPLEMENT_NEW_LIST(MemoryXFileList, MemoryXFileNode); char MemoryXFile::petit_message_pour_melanie[10000]; MemoryXFile::MemoryXFile(MemoryXFileList *memory_, int size_, int buffer_format_size_) { addIOption("buffer size", &buffer_size, 65536, "buffer size for writing"); buffer_format_size = buffer_format_size_; buffer_format = (char *)allocator->alloc(buffer_format_size); // Copy the list, but not inside memory = new(allocator) MemoryXFileList; for(int i = 0; i < memory_->n_nodes; i++) { MemoryXFileNode *node_ = (MemoryXFileNode *)memory->allocator->alloc(sizeof(MemoryXFileNode)); node_->size = memory_->nodes[i]->size; node_->mem = memory_->nodes[i]->mem; memory->addNode(node_); } /*** Remarque: Si je me trompe pas, size c'est ce qui est ecrit. (genre si je fais des seek, je sais ce qu'il y a). total_size, c'est ce qui est alloue. ****/ // Check the size if(size_ < 0) { total_size = 0; for(int i = 0; i < memory->n_nodes; i++) total_size += memory->nodes[i]->size; size = total_size; } else { size = size_; total_size = size_; } // Boxon position = 0; internal_memory_node_index = 0; internal_position_in_the_node = 0; is_eof = false; } MemoryXFile::MemoryXFile(void *memory_, int size_, int buffer_format_size_) { addIOption("buffer size", &buffer_size, 65536, "buffer size for writing"); buffer_format_size = buffer_format_size_; buffer_format = (char *)allocator->alloc(buffer_format_size); // Copy the memory_, but not inside memory = new(allocator) MemoryXFileList; MemoryXFileNode *node_ = (MemoryXFileNode *)memory->allocator->alloc(sizeof(MemoryXFileNode)); node_->size = size_; node_->mem = memory_; memory->addNode(node_); // Check the size size = size_; total_size = size_; // Boxon position = 0; internal_memory_node_index = 0; internal_position_in_the_node = 0; is_eof = false; } MemoryXFile::MemoryXFile(int buffer_format_size_) { addIOption("buffer size", &buffer_size, 65536, "buffer size for writing"); buffer_format_size = buffer_format_size_; buffer_format = (char *)allocator->alloc(buffer_format_size); // The list... memory = new(allocator) MemoryXFileList; // The size... size = 0; total_size = 0; // Le boxon... position = 0; internal_memory_node_index = 0; internal_position_in_the_node = 0; is_eof = false; } int MemoryXFile::read(void *ptr, int block_size, int n_blocks) { // Check eof if(position == size) { is_eof = true; return 0; } // Check what to read int size_to_read = block_size*n_blocks; if((size-position) < size_to_read) { // On va tomber sur la fin du fichier. is_eof = true; size_to_read = size-position; } if(!size_to_read) return 0; // Read it int size_read = size_to_read; char *w_ptr = (char *)ptr; char *r_ptr = (char *)memory->nodes[internal_memory_node_index]->mem; while(size_to_read--) { if(internal_position_in_the_node == memory->nodes[internal_memory_node_index]->size) { internal_position_in_the_node = 0; r_ptr = (char *)memory->nodes[++internal_memory_node_index]->mem; } *w_ptr++ = r_ptr[internal_position_in_the_node++]; } // Tchao boy position += size_read; return(size_read); } int MemoryXFile::write(void *ptr, int block_size, int n_blocks) { int size_to_write = block_size*n_blocks; if(!size_to_write) return 0; char *r_ptr = (char *)ptr; // If there is still some space... if((total_size-position) > 0) { int size_to_write_now; if(size_to_write < (total_size-position)) size_to_write_now = size_to_write; else size_to_write_now = total_size-position; size_to_write -= size_to_write_now; char *w_ptr = (char *)memory->nodes[internal_memory_node_index]->mem; while(size_to_write_now--) { if(internal_position_in_the_node == memory->nodes[internal_memory_node_index]->size) { internal_position_in_the_node = 0; w_ptr = (char *)memory->nodes[++internal_memory_node_index]->mem; } w_ptr[internal_position_in_the_node++] = *r_ptr++; } } // Still something to write ? if(size_to_write) { int new_block_size = (size_to_write < buffer_size ? buffer_size : size_to_write); MemoryXFileNode *node_ = (MemoryXFileNode *)memory->allocator->alloc(sizeof(MemoryXFileNode)); node_->size = new_block_size; node_->mem = memory->allocator->alloc(new_block_size); memory->addNode(node_); total_size += new_block_size; internal_memory_node_index = memory->n_nodes-1; memcpy(memory->nodes[internal_memory_node_index]->mem, r_ptr, size_to_write); internal_position_in_the_node = size_to_write; } // Tchao girl position += block_size*n_blocks; // Check si on depasse... if(position > size) size = position; return(block_size*n_blocks); } int MemoryXFile::eof() { return is_eof; } int MemoryXFile::flush() { return 0; } void MemoryXFile::rewind() { seek(0L, SEEK_SET); } int MemoryXFile::printf(const char *format, ...) { va_list args; va_start(args, format); int res = vsprintf(petit_message_pour_melanie, format, args); va_end(args); write(petit_message_pour_melanie, 1, strlen(petit_message_pour_melanie)); return res; } int MemoryXFile::scanf(const char *format, void *ptr) { // DEBUG: ne sette pas le eof si on tombe sur la fin du fichier en lisant. // A CORRIGER! // DEBUG: correction faite le 19/05/2004 // DEBUG: n_lus recupere par %n, ce qui n'est pas conseille. arg. // Check eof if(position == size) { is_eof = true; return 0; } concat(); strcpy(buffer_format, format); strcat(buffer_format, "%n"); int n_lus; int res = sscanf(((char *)memory->nodes[0]->mem)+position, buffer_format, ptr, &n_lus); if(res == EOF) { position = size; internal_position_in_the_node = size; } else { position += n_lus; internal_position_in_the_node += n_lus; } if(position == size) is_eof = true; if(position > size) error("MemoryXFile: fatal read error, you have a bug in your code!"); return res; } void MemoryXFile::concat() { int the_size = size; if(!the_size) return; if(memory->n_nodes < 1) return; if(memory->n_nodes == 1) { if(size > 0) { if(((char *)memory->nodes[0]->mem)[size-1] == '\0') return; } } char *big_buffer = (char *)Allocator::sysAlloc(the_size+1); big_buffer[the_size] = '\0'; char *dest = big_buffer; for(int i = 0; i < memory->n_nodes; i++) { int size_ = memory->nodes[i]->size; if(size_ < the_size) { memcpy(dest, memory->nodes[i]->mem, size_); the_size -= size_; dest += size_; } else { memcpy(dest, memory->nodes[i]->mem, the_size); break; } } // Scrappe the list allocator->free(memory); // Copy the list, but not inside memory = new(allocator) MemoryXFileList; MemoryXFileNode *node_ = (MemoryXFileNode *)memory->allocator->alloc(sizeof(MemoryXFileNode)); node_->size = size+1; node_->mem = big_buffer; memory->addNode(node_); memory->allocator->retain(big_buffer); // Check the size total_size = size+1; // Boxon internal_memory_node_index = 0; internal_position_in_the_node = position; } long MemoryXFile::tell() { return((long)position); } int MemoryXFile::seek(long offset, int whence) { int new_pos = 0; switch(whence) { case SEEK_SET: new_pos = (int)offset; break; case SEEK_CUR: new_pos = position + (int)offset; break; case SEEK_END: new_pos = size - (int)offset; break; } if( (new_pos > size) || (new_pos < 0) ) return -1; int new_pos_ = new_pos; int internal_memory_node_index_ = 0; while(new_pos_ >= memory->nodes[internal_memory_node_index_]->size) new_pos_ -= memory->nodes[internal_memory_node_index_]->size; // Boxon position = new_pos; internal_memory_node_index = internal_memory_node_index_; internal_position_in_the_node = new_pos_; is_eof = false; // Efface le flag de fin. return 0; } char *MemoryXFile::gets(char *dest, int size_) { // Check eof if(position == size) { is_eof = true; return NULL; } // Check what to read // Faire gaffe au '\0' en plus a mettre a la fin... int size_to_read = size_-1; if((size-position) < size_to_read) { size_to_read = size-position; // On *risque* de tomber sur la fin du fichier. is_eof = true; } if(!size_to_read) return NULL; // Read it int size_read = 0; char *w_ptr = dest; char *r_ptr = (char *)memory->nodes[internal_memory_node_index]->mem; while(size_to_read--) { if(internal_position_in_the_node == memory->nodes[internal_memory_node_index]->size) { internal_position_in_the_node = 0; r_ptr = (char *)memory->nodes[++internal_memory_node_index]->mem; } char z = r_ptr[internal_position_in_the_node++]; size_read++; *w_ptr++ = z; if(z == '\n') { // On n'est pas tombe sur la fin du fichier. is_eof = false; break; } } *w_ptr++ = '\0'; // Ye Ye Ye position += size_read; return dest; } MemoryXFile::~MemoryXFile() { } } torch3-3.1.orig/core/MemoryXFile.h0000644000175000017500000000777610106445234017215 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef MEMORY_X_FILE_INC #define MEMORY_X_FILE_INC #include "XFile.h" #include "List.h" namespace Torch { struct MemoryXFileNode { void *mem; int size; }; DEFINE_NEW_LIST(MemoryXFileList, MemoryXFileNode); /** A file in the memory. Note that the MemoryXFile is a read-write file! When writing, data is buffered (to avoid reallocating blocks of small sizes!). Options: \begin{tabular}{lcll} "buffer size" & int & buffer size for writing & [65536] \end{tabular} @author Ronan Collobert (collober@idiap.ch) */ class MemoryXFile : public XFile { private: static char petit_message_pour_melanie[10000]; bool is_eof; public: /// The memory MemoryXFileList *memory; /// The position in the memory int position; /// The size of the memory int size; /// The total size of the memory, including allocated buffer. int total_size; /// Minimal number of bytes that will be allocated when writing... int buffer_size; // Internal int internal_memory_node_index; int internal_position_in_the_node; char *buffer_format; int buffer_format_size; /// Create a read-write file from nothing. MemoryXFile(int buffer_format_size_=256); /** Give a MemoryXFileList for the memory. A new list is created, but the memory in the nodes are not copied. If the #size# that you want to be readable is \emph{less} than the sum of the #n# in the field of the #MemoryXFileList#, you can provide it in #size_# */ MemoryXFile(MemoryXFileList *memory_, int size_=-1, int buffer_format_size_=256); /// Give a pointer for the memory. (No copy is done!) MemoryXFile(void *memory_, int size_, int buffer_format_size_=256); /** Concat all the memory in one node. Add a null character at the end. This null character is useful for #sscanf# in the #scanf# method. */ void concat(); virtual int read(void *ptr, int block_size, int n_blocks); virtual int write(void *ptr, int block_size, int n_blocks); virtual int eof(); virtual int flush(); virtual int seek(long offset, int whence); virtual long tell(); virtual void rewind(); virtual int printf(const char *format, ...); /** Warning: this method call the concat function, and therefore could take time if you do intensive read/write. */ virtual int scanf(const char *format, void *ptr); virtual char *gets(char *dest, int size_); //----- virtual ~MemoryXFile(); }; } #endif torch3-3.1.orig/core/MultiClassFormat.cc0000644000175000017500000001174310106445234020371 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "MultiClassFormat.h" namespace Torch { extern "C" int multiClassTriMelanie(const void *a, const void *b) { real *ar = (real *)a; real *br = (real *)b; if(*ar < *br) return -1; else return 1; } MultiClassFormat::MultiClassFormat(DataSet *data) { tabclasses = NULL; if(data->n_targets != 1) warning("MultiClassFormat: the data has %d ouputs", data->n_targets); int n_set = 0; for(int i = 0; i < data->n_examples; i++) { data->setExample(i); bool flag = false; for(int k = 0; k < n_set; k++) { if(data->targets->frames[0][0] == tabclasses[k]) flag = true; } if(!flag) { tabclasses = (real *)allocator->realloc(tabclasses, sizeof(real)*(n_set+1)); tabclasses[n_set++] = data->targets->frames[0][0]; } } switch(n_set) { case 0: error("MultiClassFormat: you have no examples"); break; case 1: warning("MultiClassFormat: you have only one class [%g]", tabclasses[0]); break; default: message("MultiClassFormat: %d classes detected", n_set); break; } // He He He... n_classes = n_set; qsort(tabclasses, n_classes, sizeof(real), multiClassTriMelanie); class_labels = (real **)allocator->alloc(sizeof(real *)*n_classes); for(int i = 0; i < n_classes; i++) class_labels[i] = tabclasses+i; } MultiClassFormat::MultiClassFormat(int n_classes_, real *class_labels_) { n_classes = n_classes_; tabclasses = (real *)allocator->alloc(sizeof(real)*n_classes); if(class_labels_) { for(int i = 0; i < n_classes; i++) tabclasses[i] = class_labels_[i]; } else { for(int i = 0; i < n_classes; i++) tabclasses[i] = (real)i; } class_labels = (real **)allocator->alloc(sizeof(real *)*n_classes); for(int i = 0; i < n_classes; i++) class_labels[i] = tabclasses+i; } int MultiClassFormat::getOutputSize() { return 1; } void MultiClassFormat::fromOneHot(real *outputs, real *one_hot_outputs) { real max = -INF; int index = -1; for(int i = 0; i < n_classes; i++) { if(one_hot_outputs[i] > max) { max = one_hot_outputs[i]; index = i; } } outputs[0] = (real)index; } void MultiClassFormat::toOneHot(real *outputs, real *one_hot_outputs) { real out = outputs[0]; // heuristic: find the one or two labels that are closer to "out" and // attribute them the difference between out and their label. put 0 for // all the other values // first initialize one_hot_outputs with all zeros for(int i = 0; i < n_classes; i++) one_hot_outputs[i] = 0.; // then there are 3 different cases if(out > n_classes-1) { one_hot_outputs[n_classes-1] = fabs(out - tabclasses[n_classes-1]); } else if(out < 0) { one_hot_outputs[0] = fabs(out - tabclasses[0]); } else { int before = (int)floor(out); int after = (int)ceil(out); // the scores are reversed so the max score is given to the neirest real diff_before = after - out; real diff_after = out - before; if (before == after) diff_before = diff_after = 1.; one_hot_outputs[before] = diff_before; one_hot_outputs[after] = diff_after; } } int MultiClassFormat::getClass(real *vector) { real out = vector[0]; real dist = fabs(out - tabclasses[0]); int index = 0; for(int i = 1; i < n_classes; i++) { real z = fabs(out - tabclasses[i]); if(z < dist) { index = i; dist = z; } } return(index); } MultiClassFormat::~MultiClassFormat() { } } torch3-3.1.orig/core/MultiClassFormat.h0000644000175000017500000000454010106445234020230 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef MULTI_CLASS_FORMAT_INC #define MULTI_CLASS_FORMAT_INC #include "ClassFormat.h" #include "DataSet.h" namespace Torch { /** Define the multi class encoding format. Each class is coded by a single number. @author Ronan Collobert (collober@idiap.ch) */ class MultiClassFormat : public ClassFormat { public: /// Array of class labels. real *tabclasses; /// Autodetect classes. MultiClassFormat(DataSet *data); /// By default (if #class_labels_# is not specified), class labels are 0, 1, ... MultiClassFormat(int n_classes_, real *class_labels_=NULL); virtual int getClass(real *vector); virtual int getOutputSize(); virtual void fromOneHot(real *outputs, real *one_hot_outputs); virtual void toOneHot(real *outputs, real *one_hot_outputs); virtual ~MultiClassFormat(); }; } #endif torch3-3.1.orig/core/NullXFile.cc0000644000175000017500000000426110106445234016777 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "NullXFile.h" namespace Torch { NullXFile::NullXFile() { } int NullXFile::read(void *ptr, int block_size, int n_blocks) { return 0; } int NullXFile::write(void *ptr, int block_size, int n_blocks) { return n_blocks; } int NullXFile::eof() { return 0; } int NullXFile::flush() { return 0; } int NullXFile::seek(long offset, int whence) { return 0; } long NullXFile::tell() { return 0L; } void NullXFile::rewind() { } int NullXFile::printf(const char *format, ...) { return 0; } int NullXFile::scanf(const char *format, void *ptr) { return 0; } char *NullXFile::gets(char *dest, int size_) { return NULL; } NullXFile::~NullXFile() { } } torch3-3.1.orig/core/NullXFile.h0000644000175000017500000000443710106445234016646 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef NULL_X_FILE_INC #define NULL_X_FILE_INC #include "XFile.h" namespace Torch { /** NullXFile. A XFile which do nothing! Equivalent to the "/dev/null" file on UNIX systems. @author Ronan Collobert (collober@idiap.ch) */ class NullXFile : public XFile { public: /// NullXFile(); virtual int read(void *ptr, int block_size, int n_blocks); virtual int write(void *ptr, int block_size, int n_blocks); virtual int eof(); virtual int flush(); virtual int seek(long offset, int whence); virtual long tell(); virtual void rewind(); virtual int printf(const char *format, ...); virtual int scanf(const char *format, void *ptr); virtual char *gets(char *dest, int size_); //----- virtual ~NullXFile(); }; } #endif torch3-3.1.orig/core/Object.cc0000644000175000017500000001035010106445234016337 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Object.h" #include "DiskXFile.h" #include "XFile.h" namespace Torch { Object::Object() { n_options = 0; options = NULL; allocator = new Allocator; } void Object::addOption(const char *name, int size, void *ptr, const char *help) { options = (Option *)allocator->realloc((void *)options, (n_options+1)*sizeof(Option)); Option *optr = options+n_options; optr->name = (char *)allocator->alloc(strlen(name)+1); optr->help = (char *)allocator->alloc(strlen(help)+1); strcpy(optr->name, name); strcpy(optr->help, help); optr->size = size; optr->ptr = ptr; n_options++; } void Object::addIOption(const char *name, int *ptr, int init_value, const char *help) { *ptr = init_value; addOption(name, sizeof(int), ptr, help); } void Object::addROption(const char *name, real *ptr, real init_value, const char *help) { *ptr = init_value; addOption(name, sizeof(real), ptr, help); } void Object::addBOption(const char *name, bool *ptr, bool init_value, const char *help) { *ptr = init_value; addOption(name, sizeof(bool), ptr, help); } void Object::addOOption(const char *name, Object **ptr, Object *init_value, const char *help) { *ptr = init_value; addOption(name, sizeof(Object *), ptr, help); } void Object::setOption(const char *name, void *ptr) { Option *optr = options; bool flag = false; for(int i = 0; i < n_options; i++, optr++) { if( !strcmp(optr->name, name) ) { flag = true; break; } } if(!flag) error("Object: option doesn't exist: %s", name); char *odata = (char *)optr->ptr; char *odatao = (char *)ptr; for(int i = 0; i < optr->size; i++) { *odata = *odatao; odata++; odatao++; } } void Object::setIOption(const char *name, int option) { setOption(name, (void *)&option); } void Object::setROption(const char *name, real option) { setOption(name, (void *)&option); } void Object::setBOption(const char *name, bool option) { setOption(name, (void *)&option); } void Object::setOOption(const char *name, Object *option) { setOption(name, (void *)&option); } void Object::load(const char *filename) { DiskXFile file(filename, "r"); loadXFile(&file); } void Object::save(const char *filename) { DiskXFile file(filename, "w"); saveXFile(&file); } void Object::loadXFile(XFile *file) { } void Object::saveXFile(XFile *file) { } void* Object::operator new(size_t size, Allocator *allocator_) { if(allocator_) return(allocator_->alloc(size, 1)); else return(Allocator::sysAlloc(size)); } void* Object::operator new(size_t size, Allocator *allocator_, void *ptr_) { allocator_->retain(ptr_, 2); return ptr_; } void Object::operator delete(void *ptr) { free(ptr); } Object::~Object() { delete allocator; } } torch3-3.1.orig/core/PreProcessing.h0000644000175000017500000000437110106445234017564 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef PRE_PROCESSING_INC #define PRE_PROCESSING_INC #include "Sequence.h" namespace Torch { /** This class is able to do pre-processing on examples in a #DataSet#. Pre-processing doesn't modify the \emph{structure} of sequences, but only the \emph{contents}. @see DataSet @author Ronan Collobert (collober@idiap.ch) */ class PreProcessing : public Object { public: /// PreProcessing(); /// Given an input sequence, do the pre-processing. virtual void preProcessInputs(Sequence *inputs) = 0; /// Given an target sequence, do the pre-processing. virtual void preProcessTargets(Sequence *targets) = 0; virtual ~PreProcessing(); }; DEFINE_NEW_LIST(PreProcessingList, PreProcessing); } #endif torch3-3.1.orig/core/Object.h0000644000175000017500000001177110106445234016211 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef OBJECT_INC #define OBJECT_INC #include "general.h" #include "Allocator.h" namespace Torch { //----- class XFile; typedef struct Option_ { char *name; char *help; int size; void *ptr; } Option; //----- /** Almost all classes in Torch should be a sub-class of this class. It provides two interesting things: first a useful interface to manage option, and second the "allocator trick". Indeed, in Torch, a class which allocate memory should desallocate this memory when it's destroyed. The idea is the following: in the constructor of Object, an Allocator object is created. You can use it in your subclasses to allocate memory. This memory will be destroyed when the Object is destroyed. "new" operator have been defined to be used with Allocator. @see Allocator @author Ronan Collobert (collober@idiap.ch) */ class Object { public: int n_options; Option *options; /// Allocator associated to the Object. Allocator *allocator; /// Object(); //----- /** Add the option #name#. This one has a pointer on #ptr# and has the size #size#. You can provide an help in #help#. (Note that if #help# is empty, its an empty string and not NULL). If the option can be changed at any time, you can set #is_allowed_after_init# to #true#. */ void addOption(const char *name, int size, void *ptr, const char *help=""); /** Several wrappers of #addOption()# for Int, Real and Bool options. Be carefull: there is no type checking. */ void addIOption(const char *name, int *ptr, int init_value, const char *help=""); /// void addROption(const char *name, real *ptr, real init_value, const char *help=""); /// void addBOption(const char *name, bool *ptr, bool init_value, const char *help=""); /// void addOOption(const char *name, Object **ptr, Object *init_value, const char *help=""); /// Set the option #name# to the value contained at #ptr#. void setOption(const char *name, void *ptr); /** Several wrappers of #setOption()# for Int, Real and Bool options. Be carefull: there is no type checking. */ void setIOption(const char *name, int option); /// void setROption(const char *name, real option); /// void setBOption(const char *name, bool option); /// void setOOption(const char *name, Object *option); /// Load the object from a file pointer (\emph{not the options}) virtual void loadXFile(XFile *file); /// Save the object to a file pointer (\emph{not the options}) virtual void saveXFile(XFile *file); /// Load the machine from a file name (\emph{not the options}) void load(const char *filename); /// Save the machine to a file name (\emph{not the options}) void save(const char *filename); /** Allocate the object using #allocator_#. The object will be destroyed and freed when the allocator will be destroyed. If no allocator is provided, the new will be similar than the standard new. */ void* operator new(size_t size, Allocator *allocator_=NULL); /** Allocate the object using the memory given by #ptr_#. The object will be destroyed (but not freed!) when the allocator will be destroyed. */ void* operator new(size_t size, Allocator *allocator_, void *ptr_); /// Delete an object. void operator delete(void *ptr); //----- virtual ~Object(); }; } #endif torch3-3.1.orig/core/PreProcessing.cc0000644000175000017500000000335310106445234017721 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "PreProcessing.h" namespace Torch { // Que d'al ici. PreProcessing::PreProcessing() { } // Que d'al la. PreProcessing::~PreProcessing() { } // Oh yeah. IMPLEMENT_NEW_LIST(PreProcessingList, PreProcessing) } torch3-3.1.orig/core/OneHotClassFormat.cc0000644000175000017500000000625410106445234020474 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "OneHotClassFormat.h" namespace Torch { OneHotClassFormat::OneHotClassFormat(DataSet *data) { n_classes = data->n_targets; class_labels_buffer = (real *)allocator->alloc(sizeof(real)*n_classes*n_classes); class_labels = (real **)allocator->alloc(sizeof(real *)*n_classes); for(int i = 0; i < n_classes; i++) { class_labels[i] = class_labels_buffer+i*n_classes; for(int j = 0; j < n_classes; j++) class_labels[i][j] = 0.; class_labels[i][i] = 1.; } message("OneHotClassFormat: %d classes detected", n_classes); } OneHotClassFormat::OneHotClassFormat(int n_targets) { n_classes = n_targets; class_labels_buffer = (real *)allocator->alloc(sizeof(real)*n_classes*n_classes); class_labels = (real **)allocator->alloc(sizeof(real *)*n_classes); for(int i = 0; i < n_classes; i++) { class_labels[i] = class_labels_buffer+i*n_classes; for(int j = 0; j < n_classes; j++) class_labels[i][j] = 0.; class_labels[i][i] = 1.; } message("OneHotClassFormat: %d classes detected", n_classes); } int OneHotClassFormat::getOutputSize() { return n_classes; } void OneHotClassFormat::fromOneHot(real *outputs, real *one_hot_outputs) { for(int i = 0; i < n_classes; i++) outputs[i] = one_hot_outputs[i]; } void OneHotClassFormat::toOneHot(real *outputs, real *one_hot_outputs) { for(int i = 0; i < n_classes; i++) one_hot_outputs[i] = outputs[i]; } int OneHotClassFormat::getClass(real *vector) { real z = vector[0]; int index = 0; for(int i = 1; i < n_classes; i++) { if(vector[i] > z) { index = i; z = vector[i]; } } return(index); } OneHotClassFormat::~OneHotClassFormat() { } } torch3-3.1.orig/core/OneHotClassFormat.h0000644000175000017500000000444010106445234020331 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef ONE_HOT_CLASS_FORMAT_INC #define ONE_HOT_CLASS_FORMAT_INC #include "ClassFormat.h" #include "DataSet.h" namespace Torch { /** Define the one hot class encoding format. @author Ronan Collobert (collober@idiap.ch) */ class OneHotClassFormat : public ClassFormat { public: real *class_labels_buffer; /** Autodetect number of classes. (Equal to the frame size of the targets). */ OneHotClassFormat(DataSet *data); /// Specify the number of targets. OneHotClassFormat(int n_targets); virtual int getClass(real *target); virtual int getOutputSize(); virtual void fromOneHot(real *outputs, real *one_hot_outputs); virtual void toOneHot(real *outputs, real *one_hot_outputs); virtual ~OneHotClassFormat(); }; } #endif torch3-3.1.orig/core/Parameters.cc0000644000175000017500000000770610106445234017247 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Parameters.h" #include "XFile.h" namespace Torch { Parameters::Parameters() { // General data = NULL; n_data = 0; size = NULL; n_params = 0; } Parameters::Parameters(int n_params_) { if(n_params_ <= 0) { data = NULL; n_data = 0; size = NULL; n_params = 0; return; } // General data = (real **)allocator->alloc(sizeof(real *)); data[0] = (real *)allocator->alloc(sizeof(real)*n_params_); n_data = 1; size = (int *)allocator->alloc(sizeof(int)); size[0] = n_params_; n_params = n_params_; } void Parameters::addParameters(real *params_, int n_params_, bool do_copy) { if(!params_) return; data = (real **)allocator->realloc(data, sizeof(real *)*(n_data+1)); size = (int *)allocator->realloc(size, sizeof(int)*(n_data+1)); size[n_data] = n_params_; if(do_copy) { data[n_data] = (real *)allocator->alloc(sizeof(real)*n_params_); real *dest_ = data[n_data]; for(int i = 0; i < n_params_; i++) dest_[i] = params_[i]; } else data[n_data] = params_; n_data++; n_params += n_params_; } void Parameters::add(Parameters *params_, bool do_copy) { if(!params_) return; for(int i = 0; i < params_->n_data; i++) this->addParameters(params_->data[i], params_->size[i], do_copy); } void Parameters::copy(Parameters *from) { if(n_params == 0) return; int src_number = 0; int src_index = 0; int src_size = from->size[0]; real *src_ = from->data[0]; for(int i = 0; i < n_data; i++) { real *dest_ = data[i]; for(int j = 0; j < size[i]; j++) { if(src_index == src_size) { src_index = 0; src_size = from->size[++src_number]; src_ = from->data[src_number]; } dest_[j] = src_[src_index++]; } } } void Parameters::copyFrom(real *vec) { for(int i = 0; i < n_data; i++) { real *dest_ = data[i]; for(int j = 0; j < size[i]; j++) dest_[j] = vec[j]; vec += size[i]; } } void Parameters::copyTo(real *vec) { for(int i = 0; i < n_data; i++) { real *src_ = data[i]; for(int j = 0; j < size[i]; j++) vec[j] = src_[j]; vec += size[i]; } } void Parameters::loadXFile(XFile *file) { for(int i = 0; i < n_data; i++) file->taggedRead(data[i], sizeof(real), size[i], "PARAMS"); } void Parameters::saveXFile(XFile *file) { for(int i = 0; i < n_data; i++) file->taggedWrite(data[i], sizeof(real), size[i], "PARAMS"); } Parameters::~Parameters() { } } torch3-3.1.orig/core/Parameters.h0000644000175000017500000000641110106445234017101 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef PARAMETERS_INC #define PARAMETERS_INC #include "Object.h" namespace Torch { /** Parameters definition. Parameters are a bench of real arrays. @author Ronan Collobert (collober@idiap.ch) */ class Parameters : public Object { public: /** Array of pointers to array of parameters. The size of this array is given by #n_data#. The size of #data[i]# is given by #size[i]# */ real **data; /// Size of #data# int n_data; /// #size[i]# is the size of #data[i]# int *size; /// Total number of parameters int n_params; /// No parameters ? Parameters(); /// Create one entry in #data# with the given size. Parameters(int n_params_); /** Add an entry in #data#. If #do_copy# is true, copy the parameters. Else, just copy the pointer. */ void addParameters(real *params, int n_params_, bool do_copy=false); /** Add all entries given by #params# in #data#. If #do_copy# is true, copy the parameters, else just copy the pointers. */ void add(Parameters *params, bool do_copy=false); /** Copy the given parameters. The given parameters don't need to have the same structure. But it must have the same total length. */ void copy(Parameters *from); /** Copy a real vector in the parameters. The parameters \emph{must} have the good size! */ void copyFrom(real *vec); /** Copy the full parameters in a real vector. The parameters \emph{must} have the good size! */ void copyTo(real *vec); /// Save all the parameters. virtual void saveXFile(XFile *file); /// Load all the parameters. virtual void loadXFile(XFile *file); virtual ~Parameters(); }; } #endif torch3-3.1.orig/core/PipeXFile.cc0000644000175000017500000000336210106445234016763 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "PipeXFile.h" namespace Torch { PipeXFile::PipeXFile(const char *command, const char *open_flags) : DiskXFile(popen(command, open_flags)) { is_opened = true; its_a_pipe = true; } PipeXFile::~PipeXFile() { } } torch3-3.1.orig/core/PipeXFile.h0000644000175000017500000000361610106445234016627 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef PIPE_X_FILE_INC #define PIPE_X_FILE_INC #include "DiskXFile.h" namespace Torch { /** A file in a pipe. @author Ronan Collobert (collober@idiap.ch) */ class PipeXFile : public DiskXFile { public: /// Execute command with the flags #open_flags# PipeXFile(const char *command, const char *open_flags); //----- virtual ~PipeXFile(); }; } #endif torch3-3.1.orig/core/ClassFormatDataSet.h0000644000175000017500000000610110106445234020456 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef CLASS_FORMAT_DATA_SET_INC #define CLASS_FORMAT_DATA_SET_INC #include "DataSet.h" namespace Torch { /** Given a DataSet, convert (on-the-fly) targets using a conversion table. The targets of the given DataSet should be 0,1,2... for the class 0, class 1, class 2, and so on. Note that is must START AT 0. After a setExample(), the inputs of this DataSet will be the same as the provided one. The targets will be #class_labels->frames[i]#, where #i# is contained in the targets of the provided DataSet. @author Ronan Collobert (collober@idiap.ch) */ class ClassFormatDataSet : public DataSet { public: /// The provided DataSet. DataSet *data; /// The class label translation table. Sequence *class_labels; /// Here you provide the translation table in #class_labels_#. ClassFormatDataSet(DataSet *data_, Sequence *class_labels_); /** We assume here that you want the one-hot encoding format. The number of classes is given by #n_classes#, if positive. Otherwise, the number of classes is guessed by taking the maximum value of the targets of the provided DataSet. */ ClassFormatDataSet(DataSet *data_, int n_classes=-1); virtual void getNumberOfFrames(int t_, int *n_input_frames_, int *n_target_frames_); virtual void setRealExample(int t, bool set_inputs=true, bool set_targets=true); virtual void preProcess(PreProcessing *pre_processing); virtual void pushExample(); virtual void popExample(); //----- virtual ~ClassFormatDataSet(); }; } #endif torch3-3.1.orig/core/Random.cc0000644000175000017500000002065710106445234016364 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Random.h" #include "Allocator.h" namespace Torch { // The initial seed. unsigned long Random::the_initial_seed; ///// Code for the Mersenne Twister random generator.... const int Random::n = 624; const int Random::m = 397; int Random::left = 1; int Random::initf = 0; unsigned long *Random::next; unsigned long Random::state[Random::n]; /* the array for the state vector */ //////////////////////////////////////////////////////// /// For normal distribution real Random::normal_x; real Random::normal_y; real Random::normal_rho; bool Random::normal_is_valid = false; void Random::seed() { time_t ltime; struct tm *today; time(<ime); today = localtime(<ime); manualSeed((unsigned long)today->tm_sec); } ///////////// The next 4 methods are taken from http://www.math.keio.ac.jp/matumoto/emt.html ///////////// Here is the copyright: ///////////// Some minor modifications have been made to adapt to "my" C++... /* A C-program for MT19937, with initialization improved 2002/2/10. Coded by Takuji Nishimura and Makoto Matsumoto. This is a faster version by taking Shawn Cokus's optimization, Matthe Bellew's simplification, Isaku Wada's real version. Before using, initialize the state by using init_genrand(seed) or init_by_array(init_key, key_length). Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The names of its contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Any feedback is very welcome. http://www.math.keio.ac.jp/matumoto/emt.html email: matumoto@math.keio.ac.jp */ ////////////////// Macros for the Mersenne Twister random generator... /* Period parameters */ //#define n 624 //#define m 397 #define MATRIX_A 0x9908b0dfUL /* constant vector a */ #define UMASK 0x80000000UL /* most significant w-r bits */ #define LMASK 0x7fffffffUL /* least significant r bits */ #define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) ) #define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL)) /////////////////////////////////////////////////////////// That's it. void Random::manualSeed(unsigned long the_seed_) { the_initial_seed = the_seed_; state[0]= the_initial_seed & 0xffffffffUL; for(int j = 1; j < n; j++) { state[j] = (1812433253UL * (state[j-1] ^ (state[j-1] >> 30)) + j); /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ /* In the previous versions, mSBs of the seed affect */ /* only mSBs of the array state[]. */ /* 2002/01/09 modified by makoto matsumoto */ state[j] &= 0xffffffffUL; /* for >32 bit machines */ } left = 1; initf = 1; } unsigned long Random::getInitialSeed() { if(initf == 0) { warning("Random: initializing the random generator"); seed(); } return the_initial_seed; } void Random::nextState() { unsigned long *p=state; /* if init_genrand() has not been called, */ /* a default initial seed is used */ if(initf == 0) seed(); // manualSeed(5489UL); left = n; next = state; for(int j = n-m+1; --j; p++) *p = p[m] ^ TWIST(p[0], p[1]); for(int j = m; --j; p++) *p = p[m-n] ^ TWIST(p[0], p[1]); *p = p[m-n] ^ TWIST(p[0], state[0]); } unsigned long Random::random() { unsigned long y; if (--left == 0) nextState(); y = *next++; /* Tempering */ y ^= (y >> 11); y ^= (y << 7) & 0x9d2c5680UL; y ^= (y << 15) & 0xefc60000UL; y ^= (y >> 18); return y; } /* generates a random number on [0,1)-real-interval */ real Random::uniform() { unsigned long y; if(--left == 0) nextState(); y = *next++; /* Tempering */ y ^= (y >> 11); y ^= (y << 7) & 0x9d2c5680UL; y ^= (y << 15) & 0xefc60000UL; y ^= (y >> 18); return (real)y * (1.0/4294967296.0); /* divided by 2^32 */ } /// /// Thanks *a lot* Takuji Nishimura and Makoto Matsumoto! /// ///////////////////////////////////////////////////////////////////// //// Now my own code... void Random::getShuffledIndices(int *indices, int n_indices) { for(int i = 0; i < n_indices; i++) indices[i] = i; Random::shuffle(indices, sizeof(int), n_indices); } void Random::shuffle(void *tabular, int size_elem, int n_elems) { void *save = Allocator::sysAlloc(size_elem); char *tab = (char *)tabular; for(int i = 0; i < n_elems-1; i++) { int z = Random::random() % (n_elems-i); memcpy(save, tab+i*size_elem, size_elem); memmove(tab+i*size_elem, tab+(z+i)*size_elem, size_elem); memcpy(tab+(z+i)*size_elem, save, size_elem); } free(save); } real Random::boundedUniform(real a, real b) { return(Random::uniform() * (b - a) + a); } real Random::normal(real mean, real stdv) { if(!normal_is_valid) { normal_x = Random::uniform(); normal_y = Random::uniform(); normal_rho = sqrt(-2. * log(1.0-normal_y)); normal_is_valid = true; } else normal_is_valid = false; if(normal_is_valid) return normal_rho*cos(2.*M_PI*normal_x)*stdv+mean; else return normal_rho*sin(2.*M_PI*normal_x)*stdv+mean; } real Random::exponential(real lambda) { return(-1. / lambda * log(1-Random::uniform())); } real Random::cauchy(real median, real sigma) { return(median + sigma * tan(M_PI*(Random::uniform()-0.5))); } // Faut etre malade pour utiliser ca. // M'enfin. real Random::logNormal(real mean, real stdv) { real zm = mean*mean; real zs = stdv*stdv; return(exp(Random::normal(log(zm/sqrt(zs + zm)), sqrt(log(zs/zm+1)) ))); } int Random::geometric(real p) { return((int)(log(1-Random::uniform()) / log(p)) + 1); } bool Random::bernouilli(real p) { return(Random::uniform() <= p); } } torch3-3.1.orig/core/Random.h0000644000175000017500000001144510106445234016221 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef RANDOM_INC #define RANDOM_INC #include "general.h" namespace Torch { /** Random class which contains several static random methods. These methods are based on a uniform random generator, named "Mersenne Twister", available at: http://www.math.keio.ac.jp/matumoto/emt.html. Copyright Makoto Matsumoto and Takuji Nishimura. (Have a look inside the implementation file for details). The random generator can be initialized with the manualSeed() method. Otherwise, it will be automatically initialized with a seed based on the current computer clock. @author Ronan Collobert (collober@idiap.ch) */ class Random { public: // The seed used to initialize the random generator. static unsigned long the_initial_seed; // Internal variables for the Mersenne Twister generator static const int n; static const int m; static unsigned long state[]; /* the array for the state vector */ static int left; static int initf; static unsigned long *next; // Internal variables for the normal distribution generator static real normal_x; static real normal_y; static real normal_rho; static bool normal_is_valid; // Internal method for the Mersenne Twister generator static void nextState(); /// Initializes the random number generator with the computer clock. static void seed(); /// Initializes the random number generator with the given long "the_seed_". static void manualSeed(unsigned long the_seed_); /// Returns the starting seed used. static unsigned long getInitialSeed(); /// Generates a uniform 32 bits integer. static unsigned long random(); /// Generates a uniform random number on [0,1[. static real uniform(); /// Returns in #indices# #n_indices# shuffled. (between 0 and #n_indices-1#). static void getShuffledIndices(int *indices, int n_indices); /// Shuffles tabular, which contains #n_elems# of size #size_elem#. static void shuffle(void *tabular, int size_elem, int n_elems); /// Generates a uniform random number on [a,b[ (b>a). static real boundedUniform(real a, real b); /** Generates a random number from a normal distribution. (With mean #mean# and standard deviation #stdv >= 0#). */ static real normal(real mean=0, real stdv=1); /** Generates a random number from an exponential distribution. The density is $p(x) = lambda * exp(-lambda * x)$, where lambda is a positive number. */ static real exponential(real lambda); /** Returns a random number from a Cauchy distribution. The Cauchy density is $p(x) = sigma/(pi*(sigma^2 + (x-median)^2))$ */ static real cauchy(real median=0, real sigma=1); /** Generates a random number from a log-normal distribution. (#mean > 0# is the mean of the log-normal distribution and #stdv# is its standard deviation). */ static real logNormal(real mean, real stdv); /** Generates a random number from a geometric distribution. It returns an integer #i#, where $p(i) = (1-p) * p^(i-1)$. p must satisfy $0 < p < 1$. */ static int geometric(real p); /// Returns true with probability $p$ and false with probability $1-p$ (p > 0). static bool bernouilli(real p=0.5); }; } #endif torch3-3.1.orig/core/Sequence.cc0000644000175000017500000001606610106445234016713 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Sequence.h" #include "XFile.h" namespace Torch { Sequence::Sequence() { // General frames = NULL; n_real_frames = 0; n_frames = 0; frame_size = 0; } Sequence::Sequence(real **frames_, int n_frames_, int frame_size_) { frames = frames_; n_real_frames = n_frames_; n_frames = n_frames_; frame_size = frame_size_; } Sequence::Sequence(int n_frames_, int frame_size_) { // General n_real_frames = n_frames_; n_frames = n_frames_; frame_size = frame_size_; if(n_frames > 0) frames = (real **)allocator->alloc(sizeof(real *)*n_frames); else { frames = NULL; n_frames = 0; } if(frame_size <= 0) error("Sequence: try to create a sequence with a negative size"); if(n_frames > 0) { real *data_buffer = NULL; data_buffer = (real *)allocator->alloc(sizeof(real)*n_frames*frame_size); for(int i = 0; i < n_frames; i++) frames[i] = data_buffer+i*frame_size; } } void Sequence::resize(int n_frames_, bool allocate_new_frames) { // Do we have already this frames in memory ? OOooOOh cooOOool... if(n_real_frames >= n_frames_) { n_frames = n_frames_; return; } // Allocate array of frames and possibly the frames... // If frame_size is 0 (or user explicit specification), frames won't be allocated reallocFramesArray(n_frames_); if( (frame_size == 0) || (!allocate_new_frames) ) { for(int i = n_real_frames; i < n_frames_; i++) frames[i] = NULL; } else { real *data_buffer = (real *)allocator->alloc(sizeof(real)*(n_frames_-n_real_frames)*frame_size); for(int i = n_real_frames; i < n_frames_; i++) frames[i] = data_buffer+(i-n_real_frames)*frame_size; } n_frames = n_frames_; n_real_frames = n_frames_; } void Sequence::addFrame(real *frame, bool do_copy) { if(!frame) return; reallocFramesArray(n_real_frames+1); if(do_copy) { frames[n_real_frames] = (real *)allocator->alloc(sizeof(real)*frame_size); real *frame_dest = frames[n_real_frames]; for(int i = 0; i < frame_size; i++) frame_dest[i] = frame[i]; } else frames[n_real_frames] = frame; if(n_real_frames == n_frames) n_frames++; n_real_frames++; } void Sequence::add(Sequence *sequence, bool do_copy) { if(!sequence) return; if(frame_size != sequence->frame_size) error("Sequence: try to add a sequence with a wrong frame size"); reallocFramesArray(n_real_frames+sequence->n_frames); if(do_copy) { for(int i = 0; i < sequence->n_frames; i++) { frames[n_real_frames+i] = (real *)allocator->alloc(sizeof(real)*frame_size); real *frame_dest = frames[n_real_frames+i]; real *frame_src = sequence->frames[i]; for(int i = 0; i < frame_size; i++) frame_dest[i] = frame_src[i]; } } else { for(int i = 0; i < sequence->n_frames; i++) frames[n_real_frames+i] = sequence->frames[i]; } if(n_real_frames == n_frames) n_frames += sequence->n_frames; n_real_frames += sequence->n_frames; } void Sequence::copy(Sequence *from) { int src_frame_size = from->frame_size; int src_frame_number = 0; int src_index_in_frame = 0; real *src_frame = from->frames[0]; for(int i = 0; i < n_frames; i++) { real *dest_frame = frames[i]; for(int j = 0; j < frame_size; j++) { if(src_index_in_frame == src_frame_size) { src_index_in_frame = 0; src_frame = from->frames[++src_frame_number]; } dest_frame[j] = src_frame[src_index_in_frame++]; } } } void Sequence::reallocFramesArray(int n_frames_) { if(allocator->isMine(frames)) frames = (real **)allocator->realloc(frames, sizeof(real *)*(n_frames_)); else { real **frames_ = (real **)allocator->alloc(sizeof(real *)*(n_frames_)); for(int i = 0; i < n_real_frames; i++) frames_[i] = frames[i]; frames = frames_; } } void Sequence::copyFrom(real *vec) { for(int i = 0; i < n_frames; i++) { real *frame_ = frames[i]; for(int j = 0; j < frame_size; j++) frame_[j] = vec[j]; vec += frame_size; } } void Sequence::copyTo(real *vec) { for(int i = 0; i < n_frames; i++) { real *frame_ = frames[i]; for(int j = 0; j < frame_size; j++) vec[j] = frame_[j]; vec += frame_size; } } int Sequence::getSequenceSpace() { return(sizeof(Sequence)); } int Sequence::getFramesSpace() { return(frame_size*n_frames*sizeof(real)+n_frames*sizeof(real *)); } Sequence *Sequence::clone(Allocator *allocator_, void *sequence_memory, void *frames_memory) { if(!allocator_) allocator_ = allocator; real **frames_ = (real **)frames_memory; if(frames_memory) { real *buffer = (real *)(frames_ + n_frames); for(int i = 0; i < n_frames; i++) frames_[i] = buffer+i*frame_size; } Sequence *the_clone = NULL; if(sequence_memory) { if(frames_memory) the_clone = new(allocator_, sequence_memory) Sequence(frames_, n_frames, frame_size); else the_clone = new(allocator_, sequence_memory) Sequence(n_frames, frame_size); } else { if(frames_memory) the_clone = new(allocator_) Sequence(frames_, n_frames, frame_size); else the_clone = new(allocator_) Sequence(n_frames, frame_size); } the_clone->copy(this); return the_clone; } void Sequence::loadXFile(XFile *file) { for(int i = 0; i < n_frames; i++) file->taggedRead(frames[i], sizeof(real), frame_size, "FRAME"); } void Sequence::saveXFile(XFile *file) { for(int i = 0; i < n_frames; i++) file->taggedWrite(frames[i], sizeof(real), frame_size, "FRAME"); } Sequence::~Sequence() { } IMPLEMENT_NEW_LIST(SequenceList, Sequence) } torch3-3.1.orig/core/Sequence.h0000644000175000017500000001215710106445234016552 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef SEQUENCE_INC #define SEQUENCE_INC #include "Object.h" #include "List.h" namespace Torch { /** Sequence definition. A sequence is a set of frames (that is, a vector) which have the same size. @author Ronan Collobert (collober@idiap.ch) */ class Sequence : public Object { private: /* Resize the array of frames to #n_frames_#. If the array hasn't be allocated by the class, allocate a new one and copy previous frame pointers. Else, just do a realloc. Note that #n_frames_# must be the new #n_real_frames#... */ void reallocFramesArray(int n_frames_); public: /// Real number of frames int n_real_frames; /** Array of usable frame pointers. The actual size of this array is given by #n_real_frames#. And the usable size is given by #n_frames#. */ real **frames; /// Number of visible frames int n_frames; /// Frame size int frame_size; /// Create an empty sequence Sequence(); /** Create a sequences of #n_frames_# frames with size #frame_size_#. The frames are given by the #frames_# array. Nothing (except pointers!) will be copied. */ Sequence(real **frames_, int n_frames_, int frame_size_); /** Create a sequence with #n_frames_# \emph{standard} frames of size #frame_size#. */ Sequence(int n_frames_, int frame_size_); /** Resize the sequence to #n_frames_#. Note that if #n_frames_# is lower than the previous one, the frames won't be deallocated, and can be retrieved by another resize... */ void resize(int n_frames_, bool allocate_new_frames=true); /** Add a frame at the end of the Sequence. If #do_copy# is true, copy the sequence. Else, just copy the pointer. */ void addFrame(real *frame, bool do_copy=false); /** Add a sequence at the end of the Sequence. If #do_copy# is true, copy the frame contents. Else, just copy the frame pointers. */ void add(Sequence *sequence, bool do_copy=false); /** Copy the given sequence. The given sequence don't need to have the same structure. But it must have the same total length. */ void copy(Sequence *from); /** Copy a real vector in the full sequence. The sequence \emph{must} have the good size! */ void copyFrom(real *vec); /** Copy the full sequence in a real vector. The sequence \emph{must} have the good size! */ void copyTo(real *vec); /// Get the space needed to allocate one Sequence virtual int getSequenceSpace(); /// Get the space needed to allocate frames contained in the sequence. virtual int getFramesSpace(); /** Clone the sequence. If #allocator_# is non-null, call it for all memory allocation and object initialization. Else, the returned sequence will be destroyed when the original sequence will be destroyed. If #sequence_memory# is non-null, puts the class memory-space in it; it must contain the space given by #getSequenceSpace()#. If #frames_memory# is non-null, use the given memory for frames allocation; it must contain the space given by #getFramesSpace()#. */ virtual Sequence *clone(Allocator *allocator_=NULL, void *sequence_memory=NULL, void *frames_memory=NULL); /// Save the \emph{usable} frames. (#n_frames# available in #frames#). virtual void saveXFile(XFile *file); /// Load the \emph{usable} frames. (#n_frames# available in #frames#). virtual void loadXFile(XFile *file); virtual ~Sequence(); }; DEFINE_NEW_LIST(SequenceList, Sequence); } #endif torch3-3.1.orig/core/TimeMeasurer.cc0000644000175000017500000000424010106445234017534 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "TimeMeasurer.h" namespace Torch { TimeMeasurer::TimeMeasurer(DataSet *data_, XFile *file_) : Measurer(data_, file_) { timer = new(allocator) Timer; } void TimeMeasurer::reset() { timer->reset(); } void TimeMeasurer::measureIteration() { real the_time_ = timer->getTime(); if(binary_mode) file->write(&the_time_, sizeof(real), 1); else file->printf("%g\n", the_time_); file->flush(); } void TimeMeasurer::measureEnd() { real the_time_ = timer->getTime(); if(binary_mode) file->write(&the_time_, sizeof(real), 1); else file->printf("%g\n", the_time_); file->flush(); } TimeMeasurer::~TimeMeasurer() { } } torch3-3.1.orig/core/TimeMeasurer.h0000644000175000017500000000414610106445234017403 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef TIME_MEASURER_INC #define TIME_MEASURER_INC #include "Measurer.h" #include "Timer.h" namespace Torch { /** Measure the time (in seconds) between two #measureIteration()# calls. Save in #file# the sum of these time, at each iteration. @author Ronan Collobert (collober@idiap.ch) */ class TimeMeasurer : public Measurer { public: Timer *timer; //----- /// TimeMeasurer(DataSet *data_, XFile *file_); //----- virtual void reset(); virtual void measureIteration(); virtual void measureEnd(); virtual ~TimeMeasurer(); }; } #endif torch3-3.1.orig/core/Timer.cc0000644000175000017500000000515210106445234016215 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Timer.h" #ifndef _MSC_VER #include #include #endif namespace Torch { #ifdef _MSC_VER time_t Timer::base_time = 0; #endif real Timer::getRunTime() { #ifdef _MSC_VER time_t truc_foireux; time(&truc_foireux); return(difftime(truc_foireux, base_time)); #else struct tms current; times(¤t); real norm = (real)sysconf(_SC_CLK_TCK); return(((real)current.tms_utime)/norm); #endif } Timer::Timer() { #ifdef _MSC_VER while(!base_time) time(&base_time); #endif total_time = 0; is_running = true; start_time = getRunTime(); } void Timer::reset() { total_time = 0; start_time = getRunTime(); } void Timer::stop() { if(!is_running) return; real current_time = getRunTime() - start_time; total_time += current_time; is_running = false; } void Timer::resume() { if(is_running) return; start_time = getRunTime(); is_running = true; } real Timer::getTime() { if(is_running) { real current_time = getRunTime() - start_time; return(total_time+current_time); } else return total_time; } Timer::~Timer() { } } torch3-3.1.orig/core/Timer.h0000644000175000017500000000520210106445234016053 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef TIMER_INC #define TIMER_INC #include "Object.h" namespace Torch { /** Timer... to measure time. It accumulates the time measured in several #resume()# (or constructor definition) and #stop()# calls. Use #getTime()# to know this accumulated time. @author Ronan Collobert (collober@idiap.ch) */ class Timer : public Object { #ifdef _MSC_VER private: static time_t base_time; #endif public: bool is_running; real total_time; real start_time; /// Create the timer and start it now! Timer(); /** Reset the timer. The timer will count time starting from now, and the accumulated time is erased. */ void reset(); /// Stop the timer. Updates accumulated time. void stop(); /// Resume the timer. It will count time starting from now. void resume(); /** Get the total accumulated time. (Until now, if the timer is still running. */ real getTime(); /** System dependent function which returns time elapsed since an arbitrary point reference in the past. */ static real getRunTime(); ~Timer(); }; } #endif torch3-3.1.orig/core/Trainer.cc0000644000175000017500000001303410106445234016537 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Trainer.h" #include "Random.h" namespace Torch { IMPLEMENT_NEW_LIST(MeasurerList, Measurer) Trainer::Trainer(Machine *machine_) { machine = machine_; } void Trainer::test(MeasurerList *measurers) { DataSet **datas; Measurer ***mes; int *n_mes; int n_datas; print("# Trainer: testing ["); Allocator *allocator_ = extractMeasurers(measurers, NULL, &datas, &mes, &n_mes, &n_datas); //// int n_ex = 0; for(int andrea = 0; andrea < n_datas; andrea++) n_ex += datas[andrea]->n_examples; real n_ex_mod = ( (n_ex == 0) ? 0. : 10.1/((real)n_ex)); real ex_curr = 0; real n_dots = 0; //// for(int andrea = 0; andrea < n_datas; andrea++) { DataSet *dataset = datas[andrea]; for(int i = 0; i < n_mes[andrea]; i++) mes[andrea][i]->reset(); for(int t = 0; t < dataset->n_examples; t++) { dataset->setExample(t); machine->forward(dataset->inputs); for(int i = 0; i < n_mes[andrea]; i++) mes[andrea][i]->measureExample(); if(++ex_curr * n_ex_mod >= (n_dots+1)) { if(n_ex < 10) print("_"); else print("."); n_dots++; } } for(int i = 0; i < n_mes[andrea]; i++) mes[andrea][i]->measureIteration(); for(int i = 0; i < n_mes[andrea]; i++) mes[andrea][i]->measureEnd(); } print("]\n"); delete allocator_; } // ExtractMeasurers, ou la magie du quatre etoiles... Allocator *Trainer::extractMeasurers(MeasurerList *measurers, DataSet *train, DataSet ***datas, Measurer ****meas, int **n_meas, int *n_datas) { DataSet **datas_ = NULL; Measurer ***meas_ = NULL; int *n_meas_ = NULL; int n_datas_ = 0; Allocator *allocator_ = new Allocator; // 0) Coup bas ? Eh... l'aut... if(!measurers) { if(train) { datas_ = (DataSet **)allocator_->alloc(sizeof(DataSet *)); datas_[n_datas_++] = train; n_meas_ = (int *)allocator_->alloc(sizeof(int)); n_meas_[0] = 0; } *datas = datas_; *meas = meas_; *n_meas = n_meas_; *n_datas = n_datas_; return allocator_; } // 1) Find all differents datas [-- bourrin -- et en + : +1 au cas ou tous != et tous != de train...]... datas_ = (DataSet **)allocator_->alloc(sizeof(DataSet *)*(measurers->n_nodes+1)); n_datas_ = 0; if(train) datas_[n_datas_++] = train; for(int i = 0; i < measurers->n_nodes; i++) { DataSet *potential_new_dataset = measurers->nodes[i]->data; bool already_registered = false; for(int j = 0; j < n_datas_; j++) { if(potential_new_dataset == datas_[j]) { already_registered = true; break; } } if(!already_registered) datas_[n_datas_++] = potential_new_dataset; } // 2) Allocations a la con avec l'allocator qu'on va refiler a l'aut'naze if(n_datas_ > 0) { n_meas_ = (int *)allocator_->alloc(sizeof(int)*n_datas_); meas_ = (Measurer ***)allocator_->alloc(sizeof(Measurer **)*n_datas_); } // 3) For each dataset... for(int i = 0; i < n_datas_; i++) { DataSet *counted_data = datas_[i]; // 2a) Count associated measurers... n_meas_[i] = 0; for(int j = 0; j < measurers->n_nodes; j++) { if(measurers->nodes[j]->data == counted_data) n_meas_[i]++; } // 2b) Y'a qqch ??? [pour le train...] if(!n_meas_[i]) continue; // 2c) Allocate memory meas_[i] = (Measurer **)allocator_->alloc(sizeof(Measurer *)*n_meas_[i]); // 2d) Rempli le bordel a donf [processeurs deterministes seulement] int index = 0; for(int j = 0; j < measurers->n_nodes; j++) { if(measurers->nodes[j]->data == counted_data) meas_[i][index++] = measurers->nodes[j]; } } // 4) Envoie la sauce. Restez pas plante la, y'a plus rien a voir, bordel. *datas = datas_; *meas = meas_; *n_meas = n_meas_; *n_datas = n_datas_; return allocator_; } void Trainer::loadXFile(XFile *file) { machine->loadXFile(file); } void Trainer::saveXFile(XFile *file) { machine->saveXFile(file); } Trainer::~Trainer() { } } torch3-3.1.orig/core/Trainer.h0000644000175000017500000000726310106445234016410 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef TRAINER_INC #define TRAINER_INC #include "Object.h" #include "Machine.h" #include "DataSet.h" #include "List.h" #include "Measurer.h" namespace Torch { DEFINE_NEW_LIST(MeasurerList, Measurer); /** Trainer. A trainer takes a #Machine# and is able to train this machine on a given dataset with the #train()# method. For each machine, it should exist a trainer which knows how to train this machine. Testing the machine is possible with the #test()# method. @author Ronan Collobert (collober@idiap.ch) */ class Trainer : public Object { public: Machine *machine; //----- /// Trainer(Machine *machine_); //----- /** Train the machine. The Trainer has to call the measurers when it want. */ virtual void train(DataSet *data_, MeasurerList *measurers) = 0; /** Test the machine. This method call all the measurers, for all the examples of their associated dataset. It's already written... */ virtual void test(MeasurerList *measurers); /** Make a table of measurers from a #List#. Given a #List# of #measurers#, and, if you want, a #train# #DataSet# (else NULL) \begin{itemize} \item Returns all datasets associated to the measurers in #datas#. For i != j, (*datas)[i] != (*datas)[j]. Moreover, if #train# != NULL, (*datas)[0] = #train#. \item Returns the list of measurers associated to (*datas)[i] in (*meas)[i]. \item Returns the number of measureurs associated to (*datas)[i] in (*n_meas)[i]. \item Returns in *n_datas the number of datasets in *datas. \end{itemize} Returns an allocator to all the memory allocated by the function. You have to delete this allocator by yourself. */ static Allocator *extractMeasurers(MeasurerList *measurers, DataSet *train, DataSet ***datas, Measurer ****meas, int **n_meas, int *n_datas); /// By default, just load the machine virtual void loadXFile(XFile *file); /// By default, just save the machine virtual void saveXFile(XFile *file); //----- virtual ~Trainer(); }; } #endif torch3-3.1.orig/core/TwoClassFormat.cc0000644000175000017500000000767010106445234020054 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "TwoClassFormat.h" namespace Torch { TwoClassFormat::TwoClassFormat(DataSet *data) { if(data->n_targets != 1) warning("TwoClassFormat: the data has %d ouputs", data->n_targets); int n_set = 0; for(int i = 0; i < data->n_examples; i++) { data->setExample(i); bool flag = false; for(int k = 0; k < n_set; k++) { if(data->targets->frames[0][0] == tabclasses[k]) flag = true; } if(!flag) { if(n_set == 2) error("TwoClassFormat: you have more than two classes"); tabclasses[n_set++] = data->targets->frames[0][0]; } } switch(n_set) { case 0: warning("TwoClassFormat: you have no examples"); tabclasses[0] = 0; tabclasses[1] = 0; break; case 1: warning("TwoClassFormat: you have only one class [%g]", tabclasses[0]); tabclasses[1] = tabclasses[0]; break; case 2: if(tabclasses[0] > tabclasses[1]) { real z = tabclasses[1]; tabclasses[1] = tabclasses[0]; tabclasses[0] = z; } message("TwoClassFormat: two classes detected [%g and %g]", tabclasses[0], tabclasses[1]); break; } // He He He... n_classes = 2; class_labels = (real **)allocator->alloc(sizeof(real *)*n_classes); for(int i = 0; i < n_classes; i++) class_labels[i] = tabclasses+i; } TwoClassFormat::TwoClassFormat(real class_1, real class_2) { tabclasses[0] = class_1; tabclasses[1] = class_2; n_classes = 2; class_labels = (real **)allocator->alloc(sizeof(real *)*n_classes); for(int i = 0; i < n_classes; i++) class_labels[i] = tabclasses+i; } int TwoClassFormat::getOutputSize() { return 1; } void TwoClassFormat::fromOneHot(real *outputs, real *one_hot_outputs) { outputs[0] = one_hot_outputs[0] - one_hot_outputs[1]; if(tabclasses[1] > tabclasses[0]) outputs[0] = one_hot_outputs[1] - one_hot_outputs[0]; else outputs[0] = one_hot_outputs[0] - one_hot_outputs[1]; } void TwoClassFormat::toOneHot(real *outputs, real *one_hot_outputs) { int maxclass = (tabclasses[1]>tabclasses[0]); int minclass = (tabclasses[0]>tabclasses[1]); one_hot_outputs[0] = fabs(outputs[0] - tabclasses[maxclass]); one_hot_outputs[1] = fabs(outputs[0] - tabclasses[minclass]); } int TwoClassFormat::getClass(real *vector) { real out = vector[0]; return(fabs(out - tabclasses[0]) > fabs(out - tabclasses[1]) ? 1 : 0); } TwoClassFormat::~TwoClassFormat() { } } torch3-3.1.orig/core/TwoClassFormat.h0000644000175000017500000000431610106445234017710 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef TWO_CLASS_FORMAT_INC #define TWO_CLASS_FORMAT_INC #include "ClassFormat.h" #include "DataSet.h" namespace Torch { /** Define the two class encoding format. @author Ronan Collobert (collober@idiap.ch) */ class TwoClassFormat : public ClassFormat { public: real tabclasses[2]; /// Autodetect classes. TwoClassFormat(DataSet *data); /// Specify the label of the classes. TwoClassFormat(real class_1, real class_2); virtual int getClass(real *vector); virtual int getOutputSize(); virtual void fromOneHot(real *outputs, real *one_hot_outputs); virtual void toOneHot(real *outputs, real *one_hot_outputs); virtual ~TwoClassFormat(); }; } #endif torch3-3.1.orig/core/WeightedSumMachine.cc0000644000175000017500000000712410106445234020650 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "WeightedSumMachine.h" namespace Torch { WeightedSumMachine::WeightedSumMachine(Trainer **trainers_, int n_trainers_, MeasurerList** trainers_measurers_, real *weights_) { // Boaf trainers = trainers_; trainers_measurers = trainers_measurers_; n_trainers = n_trainers_; n_outputs = trainers[0]->machine->outputs->frame_size; for(int i = 0; i < n_trainers; i++) { if(n_outputs != trainers[i]->machine->outputs->frame_size) error("WeightedSumMachine: provided machines don't have the same output size!"); } outputs = new(allocator) Sequence(0, n_outputs); if(weights_) weights = weights_; else { weights = (real *)allocator->alloc(n_trainers*sizeof(real)); for(int i = 0; i < n_trainers; i++) weights[i] = 0; } n_trainers_trained = 0; } void WeightedSumMachine::reset() { for (int i=0;imachine->reset(); n_trainers_trained = 0; } void WeightedSumMachine::forward(Sequence *inputs) { for(int i = 0; i < n_trainers_trained; i++) trainers[i]->machine->forward(inputs); int n_frames = trainers[0]->machine->outputs->n_frames; outputs->resize(n_frames); for(int i = 0; i < n_frames; i++) { real *dest_ = outputs->frames[i]; for(int j = 0; j < n_outputs; j++) dest_[j] = 0; for(int j = 0; j < n_trainers_trained; j++) { real z = weights[j]; real *src_ = trainers[j]->machine->outputs->frames[i]; for(int k = 0; k < n_outputs; k++) dest_[k] += z * src_[k]; } } } void WeightedSumMachine::loadXFile(XFile *file) { file->taggedRead(&n_trainers_trained, sizeof(int), 1, "NTRAINERS"); file->taggedRead(weights, sizeof(real), n_trainers, "WEIGHTS"); for (int i = 0; i < n_trainers; i++) trainers[i]->loadXFile(file); } void WeightedSumMachine::saveXFile(XFile *file) { file->taggedWrite(&n_trainers_trained, sizeof(int), 1, "NTRAINERS"); file->taggedWrite(weights, sizeof(real), n_trainers, "WEIGHTS"); for (int i = 0; i < n_trainers; i++) trainers[i]->saveXFile(file); } WeightedSumMachine::~WeightedSumMachine() { } } torch3-3.1.orig/core/WeightedSumMachine.h0000644000175000017500000000627710106445234020522 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef WEIGHTED_SUM_MACHINE_INC #define WEIGHTED_SUM_MACHINE_INC #include "Trainer.h" namespace Torch { /** Weighted-sum machine. This class contains a series of #Trainers#, and its forward method simply performs the average of the output of each machine associated to the trainer on the same input. @see Bagging @see Boosting @author Ronan Collobert (collober@idiap.ch) */ class WeightedSumMachine : public Machine { public: /// Output frame size int n_outputs; /// The trainers used in the combination. Trainer **trainers; /// The corresponding measurers. MeasurerList **trainers_measurers; /// The number of trainers in the combination. int n_trainers; /** The number of trainers that have been already trained. After the initialization, it's zero. Note that the forward method depends on this value. (only the first #n_trainers_trained# trainers are used) */ int n_trainers_trained; /// The weights of the combination. real *weights; /// True if the weights aren't given by the user, false otherwise. bool weights_is_allocated; /** #trainers_measurers# is an array which possibly specify the measurers which should be given to the corresponding trainer when training. If #weights_# isn't specified, weights are setted to zero. */ WeightedSumMachine(Trainer **trainer_, int n_trainers_, MeasurerList **trainers_measurers, real *weights_=NULL); //----- virtual void forward(Sequence *inputs); virtual void reset(); virtual void loadXFile(XFile *file); virtual void saveXFile(XFile *file); virtual ~WeightedSumMachine(); }; } #endif torch3-3.1.orig/core/XFile.cc0000644000175000017500000000521110106445234016140 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "XFile.h" namespace Torch { XFile::XFile() { } int XFile::taggedRead(void *ptr, int block_size, int n_blocks, const char *tag) { int tag_size; this->read(&tag_size, sizeof(int), 1); if(tag_size != (int)strlen(tag)) error("XFile: sorry, the tag <%s> cannot be read!", tag); char *tag_ = (char *)Allocator::sysAlloc(tag_size+1); tag_[tag_size] = '\0'; this->read(tag_, 1, tag_size); if(strcmp(tag, tag_)) error("XFile: tag <%s> not found!", tag); free(tag_); int block_size_; int n_blocks_; this->read(&block_size_, sizeof(int), 1); this->read(&n_blocks_, sizeof(int), 1); if( (block_size_ != block_size) || (n_blocks_ != n_blocks) ) error("XFile: tag <%s> has a corrupted size!", tag); return this->read(ptr, block_size, n_blocks); } int XFile::taggedWrite(void *ptr, int block_size, int n_blocks, const char *tag){ int tag_size = strlen(tag); this->write(&tag_size, sizeof(int), 1); this->write((char *)tag, 1, tag_size); this->write(&block_size, sizeof(int), 1); this->write(&n_blocks, sizeof(int), 1); return this->write(ptr, block_size, n_blocks); } XFile::~XFile() { } } torch3-3.1.orig/core/XFile.h0000644000175000017500000000604610106445234016011 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef X_FILE_INC #define X_FILE_INC #include "Object.h" namespace Torch { /** XFile. A File which could be anything. The syntax (and results) for method is very similar to C FILE. (Except for the FILE pointer which is not given in the parameters!). @author Ronan Collobert (collober@idiap.ch) */ class XFile : public Object { public: /// XFile(); /// Read something. virtual int read(void *ptr, int block_size, int n_blocks) = 0; /// Write. virtual int write(void *ptr, int block_size, int n_blocks) = 0; /** Read and check the tag/the size. To be used with #taggedWrite()#. If the tag and the size readed doesn't correspond to the given tag and size, an error will occur. */ int taggedRead(void *ptr, int block_size, int n_blocks, const char *tag); /// Write and write the tag/the size. int taggedWrite(void *ptr, int block_size, int n_blocks, const char *tag); /// Are we at the end ? virtual int eof() = 0; /// Flush the file. virtual int flush() = 0; /// Seek. virtual int seek(long offset, int whence) = 0; /// Tell me where am I... virtual long tell() = 0; /// Rewind. virtual void rewind() = 0; /// Print some text. virtual int printf(const char *format, ...) = 0; /// Scan some text. virtual int scanf(const char *format, void *ptr) = 0; /// Get one line (read at most #size_# characters). virtual char *gets(char *dest, int size_) = 0; //----- virtual ~XFile(); }; } #endif torch3-3.1.orig/core/general.cc0000644000175000017500000000534510106445234016556 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "general.h" namespace Torch { char xxpetit_message_pour_melanie[10000]; void error(const char* msg, ...) { va_list args; va_start(args,msg); vsprintf(xxpetit_message_pour_melanie, msg, args); printf("\n$ Error: %s\n\n", xxpetit_message_pour_melanie); fflush(stdout); va_end(args); exit(-1); } void warning(const char* msg, ...) { va_list args; va_start(args,msg); vsprintf(xxpetit_message_pour_melanie, msg, args); printf("! Warning: %s\n", xxpetit_message_pour_melanie); fflush(stdout); va_end(args); } void message(const char* msg, ...) { va_list args; va_start(args,msg); vsprintf(xxpetit_message_pour_melanie, msg, args); printf("# %s\n", xxpetit_message_pour_melanie); fflush(stdout); va_end(args); } void print(const char* msg, ...) { va_list args; va_start(args,msg); vsprintf(xxpetit_message_pour_melanie, msg, args); printf("%s", xxpetit_message_pour_melanie); fflush(stdout); va_end(args); } void controlBar(int level, int max_level) { if(level == -1) print("["); else { if(max_level < 10) print("."); else { if( !(level % (max_level/10) ) ) print("."); } if(level == max_level-1) print("]\n"); } } } torch3-3.1.orig/core/general.h0000644000175000017500000000646010106445234016417 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef GENERAL_INC #define GENERAL_INC #include #include #include #include #include #include #include #include // Old systems need that to define FLT_MAX and DBL_MAX #ifndef DBL_MAX #include #endif namespace Torch { #ifdef _MSC_VER #ifndef for #define for if (0) {} else for #endif #define M_PI 3.14159265358979323846 #define popen(x,y) _popen(x,y) #define pclose(x) _pclose(x) #define isnan(x) _isnan(x) #define log1p(x) log(1+(x)) #endif #ifdef USE_DOUBLE #define INF DBL_MAX #define REAL_EPSILON DBL_EPSILON #define real double #else #define INF FLT_MAX #define REAL_EPSILON FLT_EPSILON #define real float #endif //----------------------------------- /** Text outputs functions. These functions are like #printf()#, but you should use them instead. Note that you should never try to print a message larger than 10000 characters. @author Ronan Collobert (collober@idiap.ch) */ //@{ /// Print an error message. The program will exit. void error(const char* msg, ...); /// Print a warning message. void warning(const char* msg, ...); /// Print a message. void message(const char* msg, ...); /// Like printf. void print(const char* msg, ...); /** Print a control bar [\#\#\#\#\#\#\#\#\#\#]. First time, you can it with #level=-1#. It'll print the control bar at each time you will call that. #max_level# is the value of the last #level# you'll call this function. @author Ronan Collobert (collober@idiap.ch) */ void controlBar(int level, int max_level); //@} //----------------------------------- #ifndef min /// The min function #define min(a,b) ((a) > (b) ? (b) : (a)) #endif #ifndef max /// The max function #define max(a,b) ((a) > (b) ? (a) : (b)) #endif } #endif torch3-3.1.orig/core/log_add.cc0000644000175000017500000000501010106445234016517 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "log_add.h" #include "general.h" namespace Torch { #ifdef USE_DOUBLE #define MINUS_LOG_THRESHOLD -39.14 #else #define MINUS_LOG_THRESHOLD -18.42 #endif real logAdd(real log_a, real log_b) { if (log_a < log_b) { real tmp = log_a; log_a = log_b; log_b = tmp; } real minusdif = log_b - log_a; #ifdef DEBUG if (isnan(minusdif)) error("LogAdd: minusdif (%f) log_b (%f) or log_a (%f) is nan",minusdif,log_b,log_a); #endif if (minusdif < MINUS_LOG_THRESHOLD) return log_a; else return log_a + log1p(exp(minusdif)); } real logSub(real log_a, real log_b) { if (log_a < log_b) error("LogSub: log_a (%f) should be greater than log_b (%f)", log_a, log_b); real minusdif = log_b - log_a; #ifdef DEBUG if (isnan(minusdif)) error("LogSub: minusdif (%f) log_b (%f) or log_a (%f) is nan",minusdif,log_b,log_a); #endif if (log_a == log_b) return LOG_ZERO; else if (minusdif < MINUS_LOG_THRESHOLD) return log_a; else return log_a + log1p(-exp(minusdif)); } } torch3-3.1.orig/core/log_add.h0000644000175000017500000000411110106445234016362 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef LOG_ADD_INC #define LOG_ADD_INC #include "general.h" namespace Torch { /** Some simple functions for log operations. @author Samy Bengio (bengio@idiap.ch) */ //@{ #define LOG_2_PI 1.83787706640934548355 #define LOG_ZERO -INF #define LOG_ONE 0 /** logAdd(log_a,log_b) = log(a+b) = log(exp(log_a)+exp(log_b)) but done in a smart way so that if log_a or log_b are large but not their difference the computation works correctly. */ real logAdd(real log_a,real log_b); /// logSub(log_a,log_b) = log(a-b) real logSub(real log_a,real log_b); //@} } #endif torch3-3.1.orig/core/string_utils.cc0000644000175000017500000000542210106445234017663 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // and Ronan Collobert (collober@idiap.ch) // and Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "general.h" #include "string_utils.h" #include "Allocator.h" #include namespace Torch { char *strBaseName(char *filename) { char *p = strrchr(filename, '/'); return p ? (p+1) : filename; } char *strRemoveSuffix(char *filename, char c) { char *copy = NULL; int len = strlen(filename); char *p = filename + len - 1; int i=len-1; while (*p != c && i-- >0) p--; if (i>0) { //*p = '\0'; copy = (char*)Allocator::sysAlloc(sizeof(char)*(i+1)); strncpy(copy,filename,i); copy[i] = '\0'; } else { copy = (char*)Allocator::sysAlloc(sizeof(char)*(len+1)); strcpy(copy,filename); } return copy; } char *strConcat(int n, ...) { char **strs = (char **)Allocator::sysAlloc(sizeof(char *)*n); int taille = 0; va_list args; va_start(args, n); for(int i = 0; i < n; i++) { strs[i] = va_arg(args, char *); taille += strlen(strs[i]); } va_end(args); taille++; // Pour le truc de fin char *the_concat = (char *)Allocator::sysAlloc(sizeof(char)*taille); the_concat[0] = '\0'; for(int i = 0; i < n; i++) strcat(the_concat, strs[i]); free(strs); return(the_concat); } } torch3-3.1.orig/core/string_utils.h0000644000175000017500000000465610106445234017535 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // and Ronan Collobert (collober@idiap.ch) // and Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef STRING_UTILS_INC #define STRING_UTILS_INC #include namespace Torch { /** Some simple functions for string operations. @author Samy Bengio (bengio@idiap.ch) @author Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) @author Ronan Collobert (collober@idiap.ch) */ //@{ /** Returns the name of a file without leading pathname. (It's not a new string, but a pointer in the given string) */ char *strBaseName(char *filename); /** Returns a fresh copy of the name of a file without suffix. (Trailing chars after c) You have to free the memory! */ char *strRemoveSuffix(char *filename, char c='.'); /** Returns the concatenation #n# strings. The strings are the parameters given after #n#; You have to free the memory! */ char *strConcat(int n, ...); //@} } #endif torch3-3.1.orig/core/ClassFormatDataSet.cc0000644000175000017500000001026210106445234020617 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "ClassFormatDataSet.h" namespace Torch { ClassFormatDataSet::ClassFormatDataSet(DataSet *data_, Sequence *class_labels_) { data = data_; class_labels = class_labels_; DataSet::init(data->n_examples, data->n_inputs, class_labels->frame_size); inputs = NULL; if(n_targets > 0) targets = new(allocator) Sequence(0, n_targets); } ClassFormatDataSet::ClassFormatDataSet(DataSet *data_, int n_classes) { data = data_; if(n_classes <= 0) { n_classes = 0; for(int t = 0; t < data->n_examples; t++) { data->setExample(t, false, true); for(int i = 0; i < data->targets->n_frames; i++) { int z = (int)data->targets->frames[i][0]; if(z > n_classes) n_classes = z; } } n_classes++; message("ClassFormatDataSet: %d classes detected", n_classes); } class_labels = new(allocator) Sequence(n_classes, n_classes); for(int cl = 0; cl < n_classes; cl++) { memset(class_labels->frames[cl], 0, sizeof(real)*n_classes); class_labels->frames[cl][cl] = 1.; } DataSet::init(data->n_examples, data->n_inputs, n_classes); inputs = NULL; if(n_targets > 0) targets = new(allocator) Sequence(0, n_targets); } void ClassFormatDataSet::getNumberOfFrames(int t_, int *n_input_frames_, int *n_target_frames_) { int t = selected_examples[t_]; if( (n_inputs > 0) && n_input_frames_ ) data->getNumberOfFrames(t, n_input_frames_, NULL); if( (n_targets > 0) && n_target_frames_ ) data->getNumberOfFrames(t, NULL, n_target_frames_); } void ClassFormatDataSet::setRealExample(int t, bool set_inputs, bool set_targets) { data->setExample(t, set_inputs, set_targets); inputs = data->inputs; if(set_targets) { targets->resize(data->targets->n_frames); for(int i = 0; i < data->targets->n_frames; i++) { int cl = (int)data->targets->frames[i][0]; memcpy(targets->frames[i], class_labels->frames[cl], sizeof(real)*n_targets); } } real_current_example_index = t; } void ClassFormatDataSet::preProcess(PreProcessing *pre_processing) { error("ClassFormatDataSet: pre-processing not supported"); } void ClassFormatDataSet::pushExample() { data->pushExample(); pushed_examples->push(&inputs, sizeof(Sequence *)); pushed_examples->push(&targets, sizeof(Sequence *)); pushed_examples->push(&real_current_example_index, sizeof(int)); if(n_targets > 0) targets = new(allocator) Sequence(0, n_targets); real_current_example_index = -1; } void ClassFormatDataSet::popExample() { allocator->free(targets); pushed_examples->pop(); pushed_examples->pop(); pushed_examples->pop(); data->popExample(); } ClassFormatDataSet::~ClassFormatDataSet() { } } torch3-3.1.orig/core/OutputMeasurer.cc0000644000175000017500000000560610106445234020145 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "OutputMeasurer.h" namespace Torch { OutputMeasurer::OutputMeasurer(DataSet *data_, XFile *file_) : Measurer(data_, file_) { sequences = new(allocator) SequenceList; total_frame_size = 0; } void OutputMeasurer::addSequence(Sequence *sequence) { sequences->addNode(sequence); total_frame_size += sequence->frame_size; } void OutputMeasurer::measureExample() { if(sequences->n_nodes == 0) return; int n_frames = sequences->nodes[0]->n_frames; for(int i = 0; i < sequences->n_nodes; i++) { if(sequences->nodes[i]->n_frames != n_frames) error("OutputMeasurer: sorry, sequences don't have the same number of frames"); } if(binary_mode) { file->write(&n_frames, sizeof(int), 1); file->write(&total_frame_size, sizeof(int), 1); for(int i = 0; i < n_frames; i++) { for(int j = 0; j < sequences->n_nodes; j++) file->write(sequences->nodes[j]->frames[i], sizeof(real), sequences->nodes[j]->frame_size); } } else { file->printf("%d %d\n", n_frames, total_frame_size); for(int i = 0; i < n_frames; i++) { for(int j = 0; j < sequences->n_nodes; j++) { real *src = sequences->nodes[j]->frames[i]; for(int k = 0; k < sequences->nodes[j]->frame_size; k++) file->printf("%g ", src[k]); } file->printf("\n"); } } } OutputMeasurer::~OutputMeasurer() { } } torch3-3.1.orig/config/0000755000175000017500000000000010106445234015140 5ustar kalfakalfa00000000000000torch3-3.1.orig/config/Makefile0000644000175000017500000000057010106445234016602 0ustar kalfakalfa00000000000000# # Torch user Makefile example # # Torch location. # Make sure to specify correct location... TORCHDIR := $(shell cd ..; pwd) # # All that follows you can probably keep as is... # OS := $(shell uname -s) include $(TORCHDIR)/Makefile_options_$(OS) %: %.cc cd $(TORCHDIR); ${MAKE} mkdir -p $(VERSION_KEY) $(CC) $(CFLAGS_$(MODE)) $(INCS) -o $(VERSION_KEY)/$@ $< $(LIBS) torch3-3.1.orig/config/Makefile_options_CC0000644000175000017500000000231510106445234020721 0ustar kalfakalfa00000000000000# # What you have to check... # # Packages you want to use PACKAGES = # Magik key if you have several makefile # for the same platform MAGIK_KEY = CC_ # Compiler, linker and archiver CC = CC LD = CC AR = ar -rus # Command for creating dependencies DEP = CC -xM1 # Your librairies # (for example "-lm", but not needed on most systems...) MYLIBS = # Your includes # (for example -I/usr/local/special) MYINCS = # optimize mode DEBUG = OPT # debug mode #DEBUG = DBG # double version #FLOATING = DOUBLE # floating version FLOATING = FLOAT # Debug double mode CFLAGS_DBG_DOUBLE = -g -DUSE_DOUBLE -DDEBUG # Debug float mode CFLAGS_DBG_FLOAT = -g -DDEBUG # Optimized double mode CFLAGS_OPT_DOUBLE = -fast -xO5 -xarch=native -DUSE_DOUBLE # Optimized float mode CFLAGS_OPT_FLOAT = -fast -xO5 -xarch=native # # # Variables that you may find useful inside your Makefile # Do not touch. # # MODE = $(DEBUG)_$(FLOATING) VERSION_KEY = $(MAGIK_KEY)$(OS)_$(MODE) LIBS_DIR = $(TORCHDIR)/lib/$(VERSION_KEY) OBJS_DIR = $(TORCHDIR)/objs/$(VERSION_KEY) LIBTORCH = $(LIBS_DIR)/libtorch.a LIBS = -L$(TORCHDIR)/lib/$(VERSION_KEY) -ltorch $(MYLIBS) INCS := -I$(TORCHDIR)/core $(MYINCS) INCS += $(foreach f,$(PACKAGES),-I$(TORCHDIR)/$(f)) torch3-3.1.orig/config/Makefile_options_Darwin0000644000175000017500000000245210106445234021662 0ustar kalfakalfa00000000000000# # What you have to check... # # Packages you want to use PACKAGES = # Magik key if you have several makefile # for the same platform MAGIK_KEY = # Compiler, linker and archiver CC = c++ LD = c++ AR = ar ru # Command for creating dependencies DEP = c++ -MM # Your librairies # (for example "-lm", but not needed on most systems...) MYLIBS = # Your includes # (for example -I/usr/local/special) MYINCS = # optimize mode DEBUG = OPT # debug mode #DEBUG = DBG # double version #FLOATING = DOUBLE # floating version FLOATING = FLOAT # Debug double mode CFLAGS_DBG_DOUBLE = -g -Wall -DUSE_DOUBLE -DDEBUG # Debug float mode CFLAGS_DBG_FLOAT = -g -Wall -DDEBUG # Optimized double mode CFLAGS_OPT_DOUBLE = -Wall -O2 -ffast-math -DUSE_DOUBLE # Optimized float mode CFLAGS_OPT_FLOAT = -Wall -O2 -ffast-math # # # Variables that you may find useful inside your Makefile # Do not touch. # # MODE = $(DEBUG)_$(FLOATING) VERSION_KEY = $(MAGIK_KEY)$(OS)_$(MODE) LIBS_DIR = $(TORCHDIR)/lib/$(VERSION_KEY) OBJS_DIR = $(TORCHDIR)/objs/$(VERSION_KEY) LIBTORCH = $(LIBS_DIR)/libtorch.a LIBS = -L$(TORCHDIR)/lib/$(VERSION_KEY) -ltorch $(MYLIBS) INCS := -I$(TORCHDIR)/core $(MYINCS) INCS += $(foreach f,$(PACKAGES),-I$(TORCHDIR)/$(f)) lib: all @echo ">> ...do a ranlib... <<" @ranlib $(LIBTORCH) @echo ">> ... ok !!! <<" torch3-3.1.orig/config/Makefile_options_FreeBSD0000644000175000017500000000243610106445234021652 0ustar kalfakalfa00000000000000# # What you have to check... # # Packages you want to use PACKAGES = # Magik key if you have several makefile # for the same platform MAGIK_KEY = # Compiler, linker and archiver CC = g++ LD = g++ AR = ar -rus # Command for creating dependencies DEP = g++ -MM # Your librairies # (for example "-lm", but not needed on most systems...) MYLIBS = # Your includes # (for example -I/usr/local/special) MYINCS = # optimize mode DEBUG = OPT # debug mode #DEBUG = DBG # double version #FLOATING = DOUBLE # floating version FLOATING = FLOAT # Debug double mode CFLAGS_DBG_DOUBLE = -g -Wall -DUSE_DOUBLE -DDEBUG # Debug float mode CFLAGS_DBG_FLOAT = -g -Wall -DDEBUG # Optimized double mode CFLAGS_OPT_DOUBLE = -Wall -O2 -ffast-math -mcpu=i686 -march=i686 -malign-double -DUSE_DOUBLE # Optimized float mode CFLAGS_OPT_FLOAT = -Wall -O2 -ffast-math -mcpu=i686 -march=i686 -malign-double # # # Variables that you may find useful inside your Makefile # Do not touch. # # MODE = $(DEBUG)_$(FLOATING) VERSION_KEY = $(MAGIK_KEY)$(OS)_$(MODE) LIBS_DIR = $(TORCHDIR)/lib/$(VERSION_KEY) OBJS_DIR = $(TORCHDIR)/objs/$(VERSION_KEY) LIBTORCH = $(LIBS_DIR)/libtorch.a LIBS = -L$(TORCHDIR)/lib/$(VERSION_KEY) -ltorch $(MYLIBS) INCS := -I$(TORCHDIR)/core $(MYINCS) INCS += $(foreach f,$(PACKAGES),-I$(TORCHDIR)/$(f)) torch3-3.1.orig/config/Makefile_options_Linux0000644000175000017500000000243610106445234021537 0ustar kalfakalfa00000000000000# # What you have to check... # # Packages you want to use PACKAGES = # Magik key if you have several makefile # for the same platform MAGIK_KEY = # Compiler, linker and archiver CC = g++ LD = g++ AR = ar -rus # Command for creating dependencies DEP = g++ -MM # Your librairies # (for example "-lm", but not needed on most systems...) MYLIBS = # Your includes # (for example -I/usr/local/special) MYINCS = # optimize mode DEBUG = OPT # debug mode #DEBUG = DBG # double version #FLOATING = DOUBLE # floating version FLOATING = FLOAT # Debug double mode CFLAGS_DBG_DOUBLE = -g -Wall -DUSE_DOUBLE -DDEBUG # Debug float mode CFLAGS_DBG_FLOAT = -g -Wall -DDEBUG # Optimized double mode CFLAGS_OPT_DOUBLE = -Wall -O2 -ffast-math -mcpu=i686 -march=i686 -malign-double -DUSE_DOUBLE # Optimized float mode CFLAGS_OPT_FLOAT = -Wall -O2 -ffast-math -mcpu=i686 -march=i686 -malign-double # # # Variables that you may find useful inside your Makefile # Do not touch. # # MODE = $(DEBUG)_$(FLOATING) VERSION_KEY = $(MAGIK_KEY)$(OS)_$(MODE) LIBS_DIR = $(TORCHDIR)/lib/$(VERSION_KEY) OBJS_DIR = $(TORCHDIR)/objs/$(VERSION_KEY) LIBTORCH = $(LIBS_DIR)/libtorch.a LIBS = -L$(TORCHDIR)/lib/$(VERSION_KEY) -ltorch $(MYLIBS) INCS := -I$(TORCHDIR)/core $(MYINCS) INCS += $(foreach f,$(PACKAGES),-I$(TORCHDIR)/$(f)) torch3-3.1.orig/config/Makefile_options_OSF10000644000175000017500000000227210106445234021146 0ustar kalfakalfa00000000000000# # What you have to check... # # Packages you want to use PACKAGES = # Magik key if you have several makefile # for the same platform MAGIK_KEY = # Compiler, linker and archiver CC = cxx LD = cxx AR = ar -rus # Command for creating dependencies DEP = cxx -M # Your librairies # (for example "-lm", but not needed on most systems...) MYLIBS = -lm # Your includes # (for example -I/usr/local/special) MYINCS = # optimize mode DEBUG = OPT # debug mode #DEBUG = DBG # double version #FLOATING = DOUBLE # floating version FLOATING = FLOAT # Debug double mode CFLAGS_DBG_DOUBLE = -g -DUSE_DOUBLE -DDEBUG # Debug float mode CFLAGS_DBG_FLOAT = -g -DDEBUG # Optimized double mode CFLAGS_OPT_DOUBLE = -arch host -O5 -DUSE_DOUBLE # Optimized float mode CFLAGS_OPT_FLOAT = -arch host -O5 # # # Variables that you may find useful inside your Makefile # Do not touch. # # MODE = $(DEBUG)_$(FLOATING) VERSION_KEY = $(MAGIK_KEY)$(OS)_$(MODE) LIBS_DIR = $(TORCHDIR)/lib/$(VERSION_KEY) OBJS_DIR = $(TORCHDIR)/objs/$(VERSION_KEY) LIBTORCH = $(LIBS_DIR)/libtorch.a LIBS = -L$(TORCHDIR)/lib/$(VERSION_KEY) -ltorch $(MYLIBS) INCS := -I$(TORCHDIR)/core $(MYINCS) INCS += $(foreach f,$(PACKAGES),-I$(TORCHDIR)/$(f)) torch3-3.1.orig/config/Makefile_options_SunOS0000644000175000017500000000231510106445234021443 0ustar kalfakalfa00000000000000# # What you have to check... # # Packages you want to use PACKAGES = # Magik key if you have several makefile # for the same platform MAGIK_KEY = CC_ # Compiler, linker and archiver CC = CC LD = CC AR = ar -rus # Command for creating dependencies DEP = CC -xM1 # Your librairies # (for example "-lm", but not needed on most systems...) MYLIBS = # Your includes # (for example -I/usr/local/special) MYINCS = # optimize mode DEBUG = OPT # debug mode #DEBUG = DBG # double version #FLOATING = DOUBLE # floating version FLOATING = FLOAT # Debug double mode CFLAGS_DBG_DOUBLE = -g -DUSE_DOUBLE -DDEBUG # Debug float mode CFLAGS_DBG_FLOAT = -g -DDEBUG # Optimized double mode CFLAGS_OPT_DOUBLE = -fast -xO5 -xarch=native -DUSE_DOUBLE # Optimized float mode CFLAGS_OPT_FLOAT = -fast -xO5 -xarch=native # # # Variables that you may find useful inside your Makefile # Do not touch. # # MODE = $(DEBUG)_$(FLOATING) VERSION_KEY = $(MAGIK_KEY)$(OS)_$(MODE) LIBS_DIR = $(TORCHDIR)/lib/$(VERSION_KEY) OBJS_DIR = $(TORCHDIR)/objs/$(VERSION_KEY) LIBTORCH = $(LIBS_DIR)/libtorch.a LIBS = -L$(TORCHDIR)/lib/$(VERSION_KEY) -ltorch $(MYLIBS) INCS := -I$(TORCHDIR)/core $(MYINCS) INCS += $(foreach f,$(PACKAGES),-I$(TORCHDIR)/$(f)) torch3-3.1.orig/config/Makefile_options_SunOS_purify0000644000175000017500000000237610106445234023050 0ustar kalfakalfa00000000000000# # What you have to check... # # Packages you want to use PACKAGES = # Magik key if you have several makefile # for the same platform MAGIK_KEY = # Compiler, linker and archiver CC = purify g++ LD = purify g++ AR = ar -rus # Command for creating dependencies DEP = g++ -MM # Your librairies # (for example "-lm", but not needed on most systems...) MYLIBS = # Your includes # (for example -I/usr/local/special) MYINCS = # optimize mode #DEBUG = OPT # debug mode DEBUG = DBG # double version #FLOATING = DOUBLE # floating version FLOATING = FLOAT # Debug double mode CFLAGS_DBG_DOUBLE = -g -Wall -DUSE_DOUBLE -DDEBUG # Debug float mode CFLAGS_DBG_FLOAT = -g -Wall -DDEBUG # Optimized double mode CFLAGS_OPT_DOUBLE = -Wall -O2 -ffast-math -malign-double -DUSE_DOUBLE # Optimized float mode CFLAGS_OPT_FLOAT = -Wall -O2 -ffast-math -malign-double # # # Variables that you may find useful inside your Makefile # Do not touch. # # MODE = $(DEBUG)_$(FLOATING) VERSION_KEY = $(MAGIK_KEY)$(OS)_$(MODE) LIBS_DIR = $(TORCHDIR)/lib/$(VERSION_KEY) OBJS_DIR = $(TORCHDIR)/objs/$(VERSION_KEY) LIBTORCH = $(LIBS_DIR)/libtorch.a LIBS = -L$(TORCHDIR)/lib/$(VERSION_KEY) -ltorch $(MYLIBS) INCS := -I$(TORCHDIR)/core $(MYINCS) INCS += $(foreach f,$(PACKAGES),-I$(TORCHDIR)/$(f)) torch3-3.1.orig/config/Makefile_options_g++0000644000175000017500000000243610106445234021014 0ustar kalfakalfa00000000000000# # What you have to check... # # Packages you want to use PACKAGES = # Magik key if you have several makefile # for the same platform MAGIK_KEY = # Compiler, linker and archiver CC = g++ LD = g++ AR = ar -rus # Command for creating dependencies DEP = g++ -MM # Your librairies # (for example "-lm", but not needed on most systems...) MYLIBS = # Your includes # (for example -I/usr/local/special) MYINCS = # optimize mode DEBUG = OPT # debug mode #DEBUG = DBG # double version #FLOATING = DOUBLE # floating version FLOATING = FLOAT # Debug double mode CFLAGS_DBG_DOUBLE = -g -Wall -DUSE_DOUBLE -DDEBUG # Debug float mode CFLAGS_DBG_FLOAT = -g -Wall -DDEBUG # Optimized double mode CFLAGS_OPT_DOUBLE = -Wall -O2 -ffast-math -mcpu=i686 -march=i686 -malign-double -DUSE_DOUBLE # Optimized float mode CFLAGS_OPT_FLOAT = -Wall -O2 -ffast-math -mcpu=i686 -march=i686 -malign-double # # # Variables that you may find useful inside your Makefile # Do not touch. # # MODE = $(DEBUG)_$(FLOATING) VERSION_KEY = $(MAGIK_KEY)$(OS)_$(MODE) LIBS_DIR = $(TORCHDIR)/lib/$(VERSION_KEY) OBJS_DIR = $(TORCHDIR)/objs/$(VERSION_KEY) LIBTORCH = $(LIBS_DIR)/libtorch.a LIBS = -L$(TORCHDIR)/lib/$(VERSION_KEY) -ltorch $(MYLIBS) INCS := -I$(TORCHDIR)/core $(MYINCS) INCS += $(foreach f,$(PACKAGES),-I$(TORCHDIR)/$(f)) torch3-3.1.orig/config/CC.cfg0000644000175000017500000000140710106445234016110 0ustar kalfakalfa00000000000000[torch] # Need more verbosity ? #verbose = 1 # Packages you want to use packages = # Magik key if you want several libraries # for the same platform #magic_key = # Compiler, linker and archiver compiler = CC linker = CC archiver = ar -rus # Your librairies # (for example "-lm", but not needed on most systems...) libraries = # Your includes # (for example -I/usr/local/special) includes = # optimize mode debug = opt # debug mode #debug = dbg # double version #floating = double # float version floating = float # Debug double mode dbg_double = -g -Wall -DUSE_DOUBLE -DDEBUG # Debug float mode dbg_float = -g -Wall -DDEBUG # Optimized double mode opt_double = -fast -xO5 -xarch=native -DUSE_DOUBLE # Optimized float mode opt_float = -fast -xO5 -xarch=native torch3-3.1.orig/config/Darwin.cfg0000644000175000017500000000140310106445234017043 0ustar kalfakalfa00000000000000[torch] # Need more verbosity ? #verbose = 1 # Packages you want to use packages = # Magik key if you want several libraries # for the same platform #magic_key = # Compiler, linker and archiver compiler = g++ linker = g++ archiver = ar -rus # Your librairies # (for example "-lm", but not needed on most systems...) libraries = # Your includes # (for example -I/usr/local/special) includes = # optimize mode debug = opt # debug mode #debug = dbg # double version #floating = double # float version floating = float # Debug double mode dbg_double = -g -Wall -DUSE_DOUBLE -DDEBUG # Debug float mode dbg_float = -g -Wall -DDEBUG # Optimized double mode opt_double = -Wall -O2 -ffast-math -DUSE_DOUBLE # Optimized float mode opt_float = -Wall -O2 -ffast-math torch3-3.1.orig/config/FreeBSD.cfg0000644000175000017500000000210710106445234017033 0ustar kalfakalfa00000000000000[torch] # Need more verbosity ? #verbose = 1 # Packages you want to use packages = # Magik key if you want several libraries # for the same platform #magic_key = # Compiler, linker and archiver compiler = g++ linker = g++ #archiver = g++ -shared -o archiver = ar -rus # Your librairies # (for example "-lm", but not needed on most systems...) libraries = # Your includes # (for example -I/usr/local/special) includes = # optimize mode debug = opt # debug mode #debug = dbg # double version #floating = double # float version floating = float # Debug double mode dbg_double = -g -Wall -DUSE_DOUBLE -DDEBUG # Debug float mode dbg_float = -g -Wall -DDEBUG # Optimized double mode opt_double = -Wall -O2 -ffast-math -mcpu=i686 -march=i686 -malign-double -DUSE_DOUBLE #opt_double = -Wall -O2 -ffast-math -mtune=i686 -march=i686 -malign-double -DUSE_DOUBLE # Optimized float mode opt_float = -Wall -O2 -ffast-math -mcpu=i686 -march=i686 -malign-double #opt_float = -Wall -O2 -ffast-math -mtune=i686 -march=i686 -malign-double # Note: gcc 3.4 does not support mcpu (replaced by mtune) torch3-3.1.orig/config/g++.cfg0000644000175000017500000000143610106445234016201 0ustar kalfakalfa00000000000000[torch] # Need more verbosity ? #verbose = 1 # Packages you want to use packages = # Magik key if you want several libraries # for the same platform #magic_key = # Compiler, linker and archiver compiler = g++ linker = g++ #archiver = g++ -shared -o archiver = ar -rus # Your librairies # (for example "-lm", but not needed on most systems...) libraries = # Your includes # (for example -I/usr/local/special) includes = # optimize mode debug = opt # debug mode #debug = dbg # double version #floating = double # float version floating = float # Debug double mode dbg_double = -g -Wall -DUSE_DOUBLE -DDEBUG # Debug float mode dbg_float = -g -Wall -DDEBUG # Optimized double mode opt_double = -Wall -O2 -ffast-math -DUSE_DOUBLE # Optimized float mode opt_float = -Wall -O2 -ffast-math torch3-3.1.orig/config/Linux.cfg0000644000175000017500000000210710106445234016720 0ustar kalfakalfa00000000000000[torch] # Need more verbosity ? #verbose = 1 # Packages you want to use packages = # Magik key if you want several libraries # for the same platform #magic_key = # Compiler, linker and archiver compiler = g++ linker = g++ #archiver = g++ -shared -o archiver = ar -rus # Your librairies # (for example "-lm", but not needed on most systems...) libraries = # Your includes # (for example -I/usr/local/special) includes = # optimize mode debug = opt # debug mode #debug = dbg # double version #floating = double # float version floating = float # Debug double mode dbg_double = -g -Wall -DUSE_DOUBLE -DDEBUG # Debug float mode dbg_float = -g -Wall -DDEBUG # Optimized double mode opt_double = -Wall -O2 -ffast-math -mcpu=i686 -march=i686 -malign-double -DUSE_DOUBLE #opt_double = -Wall -O2 -ffast-math -mtune=i686 -march=i686 -malign-double -DUSE_DOUBLE # Optimized float mode opt_float = -Wall -O2 -ffast-math -mcpu=i686 -march=i686 -malign-double #opt_float = -Wall -O2 -ffast-math -mtune=i686 -march=i686 -malign-double # Note: gcc 3.4 does not support mcpu (replaced by mtune) torch3-3.1.orig/config/OSF1.cfg0000644000175000017500000000135410106445234016334 0ustar kalfakalfa00000000000000[torch] # Need more verbosity ? #verbose = 1 # Packages you want to use packages = # Magik key if you want several libraries # for the same platform #magic_key = # Compiler, linker and archiver compiler = cxx linker = cxx archiver = ar -rus # Your librairies # (for example "-lm", but not needed on most systems...) libraries = -lm # Your includes # (for example -I/usr/local/special) includes = # optimize mode debug = opt # debug mode #debug = dbg # double version #floating = double # float version floating = float # Debug double mode dbg_double = -g -DUSE_DOUBLE -DDEBUG # Debug float mode dbg_float = -g -DDEBUG # Optimized double mode opt_double = -arch host -O5 -DUSE_DOUBLE # Optimized float mode opt_float = -arch host -O5 torch3-3.1.orig/config/SunOS.cfg0000644000175000017500000000140710106445234016632 0ustar kalfakalfa00000000000000[torch] # Need more verbosity ? #verbose = 1 # Packages you want to use packages = # Magik key if you want several libraries # for the same platform #magic_key = # Compiler, linker and archiver compiler = CC linker = CC archiver = ar -rus # Your librairies # (for example "-lm", but not needed on most systems...) libraries = # Your includes # (for example -I/usr/local/special) includes = # optimize mode debug = opt # debug mode #debug = dbg # double version #floating = double # float version floating = float # Debug double mode dbg_double = -g -Wall -DUSE_DOUBLE -DDEBUG # Debug float mode dbg_float = -g -Wall -DDEBUG # Optimized double mode opt_double = -fast -xO5 -xarch=native -DUSE_DOUBLE # Optimized float mode opt_float = -fast -xO5 -xarch=native torch3-3.1.orig/Makefile0000644000175000017500000000167310106445234015342 0ustar kalfakalfa00000000000000OS := $(shell uname -s) TORCHDIR := $(shell pwd) include Makefile_options_$(OS) SUBDIRS := core SUBDIRS += $(PACKAGES) all: @echo ">>> Try to compile Torch <<<" @for subdir in ${SUBDIRS} ; do ( cd $$subdir ; ${MAKE} $@) || exit 10 ; done @echo ">> !!! Ok !!! <<<" clean: @echo ">>> Atomise all <<<" @for subdir in ${SUBDIRS} ; do ( cd $$subdir ; ${MAKE} $@ ) || exit 10 ; done @\rm -Rf $(LIBTORCHXX) @\rm -Rf $(OBJS_DIR) @\rm -Rf $(LIBS_DIR) @echo ">> !!! Ok !!! <<<" distclean: @echo ">>> Thermonuclearise all <<<" @for subdir in ${SUBDIRS} ; do ( cd $$subdir ; ${MAKE} $@ ) || exit 10 ; done @\rm -Rf lib @\rm -Rf objs @echo ">> !!! Ok !!! <<<" depend: @echo ">>> Dependencies <<<" @for subdir in ${SUBDIRS} ; do ( cd $$subdir ; ${MAKE} $@ ) || exit 10 ; done @\mkdir -p $(OBJS_DIR) @\mkdir -p $(LIBS_DIR) @echo ">> !!! Ok !!! <<<" Makefile_options_$(OS): @echo "**** PLEASE CHECK YOUR MAKEFILE_OPTIONS for >>" $(OS) "<< ****" torch3-3.1.orig/xmake0000755000175000017500000003316310106445234014734 0ustar kalfakalfa00000000000000#!/usr/bin/python # # Torch compiler script # (c) Ronan Collobert 2003--2004 # # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. The name of the author may not be used to endorse or promote products # derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os import re import sys import ConfigParser import string def addPackageFiles(list_of_files, package): files = os.listdir(config.torch_dir + "/" + package) the_re = re.compile(".*?\.(cc|h)$") for file in files: if(the_re.search(file)): list_of_files[file] = package def scanFile(file_name, valid_files): file = open(config.torch_dir + "/" + valid_files[file_name] + "/" + file_name, "r") lines = file.readlines() file.close() includes = [] the_re = re.compile("\s*\#include\s*(\"|<)(.*?)(\"|>).*") for line in lines: zob = the_re.search(line) if(zob): includes.append(zob.group(2)) return includes def findDepsOnFile(file_name, deps, valid_files): # Voir si on a besoin de soi-meme... if( (file_name not in deps) and valid_files.has_key(file_name) ): deps.append(file_name) # new_includes = [] includes = scanFile(file_name, valid_files) for include in includes: if( (include not in deps) and valid_files.has_key(include) ): new_includes.append(include) deps.append(include) for new_include in new_includes: findDepsOnFile(new_include, deps, valid_files) # Lecture de la config class Config: pass def readConfig(override_debug_mode): config = Config() config.os = os.uname()[0] the_current_dir = os.getcwd() + "/" the_torch3_index = string.rfind(the_current_dir, "/Torch3/") if(the_torch3_index < 0): print "$ Torch3 directory not found" sys.exit(0) config.torch_dir = the_current_dir[0:the_torch3_index] + "/Torch3" config_file = ConfigParser.ConfigParser() config_file.read(config.torch_dir + "/" + config.os + ".cfg") if(not config_file.has_section("torch")): print "$ Config file <" + config.torch_dir + "/" + config.os + ".cfg> not found or doesn't have any torch section" sys.exit(0) options = config_file.options("torch") if(override_debug_mode != ""): config.debug = override_debug_mode else: if("debug" in options): config.debug = config_file.get("torch", "debug") if( (config.debug != "opt") and (config.debug != "dbg") ): print "$ debug is not valid (opt or dbg accepted)" sys.exit(0) else: config.debug = "opt" if("floating" in options): config.floating = config_file.get("torch", "floating") if( (config.floating != "float") and (config.floating != "double") ): print "$ floating is not valid (float or double accepted)" sys.exit(0) else: config.floating = "float" if("magic_key" in options): config.magic_key = config_file.get("torch", "magic_key") else: config.magic_key = "" if("packages" in options): config.packages = ["core"] + string.split(config_file.get("torch", "packages")) else: config.packages = ["core"] if("includes" in options): config.includes = config_file.get("torch", "includes") else: config.includes = "" if("libraries" in options): config.libraries = config_file.get("torch", "libraries") else: config.libraries = "" if("compiler" in options): config.compiler = config_file.get("torch", "compiler") else: print "$ No compiler provided" sys.exit(0) if("linker" in options): config.linker = config_file.get("torch", "linker") else: print "$ No linker provided" sys.exit(0) if("archiver" in options): config.archiver = config_file.get("torch", "archiver") else: print "$ No archiver provided" sys.exit(0) if("verbose" in options): config.verbose = config_file.getboolean("torch", "verbose") else: config.verbose = 0 if("library_name" in options): config.library_name = config_file.get("torch", "library_name") else: config.library_name = "torch" config.mode = config.debug + "_" + config.floating if(config.mode in options): config.flags = config_file.get("torch", config.mode) else: config.flags = "" print "! No compilation flags provided" config.version_key = config.magic_key + config.os + "_" + config.mode config.libs_dir = config.torch_dir + "/libs/" + config.version_key config.objs_dir = config.torch_dir + "/objs/" + config.version_key for package in config.packages: config.includes = config.includes + " -I" + config.torch_dir + "/" + package config.libraries = "-L" + config.libs_dir + " -l" + config.library_name + " " + config.libraries return config def getValidFiles(): valid_files = {} for package in config.packages: if(os.path.exists(config.torch_dir + "/" + package)): addPackageFiles(valid_files, package) else: print "$ Package <" + package + "> doesn't exist" sys.exit(0) return valid_files def makeDepend(): valid_files = getValidFiles() file_deps = open(config.torch_dir + "/.deps_" + config.version_key, "w") print "# Computing dependencies... [" + config.version_key + "]" the_re = re.compile(".*?\.cc$") for file in valid_files.keys(): if(the_re.search(file)): deps = [] findDepsOnFile(file, deps, valid_files) deps_on_file = "" for dep in deps: deps_on_file = deps_on_file + dep + " " file_deps.write(deps_on_file[0:-1] + "\n") file_deps.close() os.system("mkdir -p " + config.objs_dir) os.system("mkdir -p " + config.libs_dir) def makeAll(): valid_files = getValidFiles() print "# Try to compile Torch... [" + config.version_key + "]" # a) Load dependencies and package associations if(not os.path.exists(config.torch_dir + "/.deps_" + config.version_key)): print "! Dependencies not existent..." return 1 file_deps = open(config.torch_dir + "/.deps_" + config.version_key, "r") lines = file_deps.readlines() file_deps.close() deps = {} for line in lines: splitted_line = string.split(line) deps[splitted_line[0][0:-2]+"o"] = splitted_line # b) Check if all valid file are in dependencies n_cc_files = 0 the_re = re.compile(".*?\.cc$") for file in valid_files.keys(): if(the_re.search(file)): n_cc_files = n_cc_files + 1 if(not deps.has_key(file[0:-2]+"o")): print "! New cc file detected, re-doing dependencies (" + file + ")" return 1 if(n_cc_files != len(deps.keys())): print "! Dependencies not up to date..." return 1 # c) Find last modification dates (for valid source files and object files) valid_file_dates = {} for valid_file in valid_files.keys(): valid_file_dates[valid_file] = os.path.getmtime(config.torch_dir + "/" + valid_files[valid_file] + "/" + valid_file) object_file_dates = {} for object_file in deps.keys(): full_path_object_file = config.objs_dir + "/" + object_file if(os.path.exists(full_path_object_file)): object_file_dates[object_file] = os.path.getmtime(full_path_object_file) else: object_file_dates[object_file] = 0 # d) Find objects to update objects_to_update = [] for object_file in deps.keys(): object_file_date = object_file_dates[object_file] for dep_file in deps[object_file]: if(valid_file_dates[dep_file] > object_file_date): objects_to_update.append(object_file) break # e) Update objects objects_to_update.sort() for object_file in objects_to_update: src_file = object_file[0:-1] + "cc" src_file = config.torch_dir + "/" + valid_files[src_file] + "/" + src_file cmd = config.compiler + " " + config.flags + " " + config.includes + " -o " + config.objs_dir + "/" + object_file + " -c " + src_file if config.verbose: print cmd else: print object_file[0:-2]+".cc" if(os.system(cmd) != 0): print "$ Compilation failed" sys.exit(0) # f) Update library i_should_archive = 0 lib_file = config.libs_dir + "/lib" + config.library_name + ".a" if( (os.path.exists(lib_file)) and (len(objects_to_update) == 0) ): lib_file_date = os.path.getmtime(lib_file) for object_file in deps.keys(): full_path_object_file = config.objs_dir + "/" + object_file if(os.path.getmtime(full_path_object_file) > lib_file_date): i_should_archive = 1 else: i_should_archive = 1 if( (not os.path.exists(lib_file)) or (len(objects_to_update) > 0) ): all_objects = "" for object_file in deps.keys(): all_objects = all_objects + config.objs_dir + "/" + object_file + " " cmd = config.archiver + " " + lib_file + " " + all_objects print "# Archiving..." if config.verbose: print cmd os.system(cmd) return 0 def makeClean(): print "# Cleaning all the objects... [" + config.version_key + "]" os.system("rm -Rf " + config.objs_dir) os.system("rm -Rf " + config.libs_dir) os.system("rm -f " + config.torch_dir + "/.deps_" + config.version_key) def makeDistClean(): print "# Atomizing all" os.system("rm -Rf " + config.torch_dir + "/objs/") os.system("rm -Rf " + config.torch_dir + "/libs/") os.system("rm -f " + config.torch_dir + "/.deps_*") override_debug_mode = "" files_to_compile = [] if(len(sys.argv) == 1): cmd = "all" else: if( (sys.argv[1] == "-opt") or (sys.argv[1] == "-dbg") ): if(sys.argv[1] == "-opt"): override_debug_mode = "opt" if(sys.argv[1] == "-dbg"): override_debug_mode = "dbg" if(len(sys.argv) == 2): cmd = "all" else: cmd = sys.argv[2] files_to_compile = sys.argv[2:] else: cmd = sys.argv[1] files_to_compile = sys.argv[1:] if(cmd == "os"): print os.uname()[0] sys.exit(0) if( (cmd == "help") or (cmd == "--help") or (cmd == "-help") or (cmd == "-h") ): print "# Torch3 compiler script" print "# usage: " + sys.argv[0][string.rfind(sys.argv[0], "/")+1:] + " [-opt,-dbg] [command]" print "# Commands:" print " - os: print name of your operating system" print " - all: compile all the library [default]" print " - depend: make dependencies" print " - clean: clean current objects and library" print " - distclean: clean all objects and library" print " - : compile given *main* program(s)" print "" sys.exit(0) config = readConfig(override_debug_mode) if(cmd == "depend"): makeDepend() elif(cmd == "all"): if(makeAll()): makeDepend() if(makeAll()): print "$ Something is wrong. What are you doing ?" sys.exit(0) elif(cmd == "clean"): makeClean() elif(cmd == "distclean"): makeDistClean() else: library_is_already_made = 0 for sub_cmd in files_to_compile: file_name = "" if(re.search(".*?\.(cc|h)$", sub_cmd)): file_name = sub_cmd else: file_name = sub_cmd + ".cc" if(not os.path.exists(file_name)): print "$ Don't know what you want to do with your <" + sub_cmd + ">" sys.exit(0) if(not library_is_already_made): library_is_already_made = 1 if(makeAll()): makeDepend() if(makeAll()): print "$ Something is wrong. What are you doing ?" sys.exit(0) os.system("mkdir -p " + config.version_key) the_compile_cmd = config.compiler + " " + config.flags + " " + config.includes + " -o " + config.version_key + "/" + file_name[0:-3] + " " + file_name + " " + config.libraries print "# Compiling <" + config.version_key + "/" + file_name[0:-3] + ">" if(config.verbose): print the_compile_cmd if(os.system(the_compile_cmd) != 0): sys.exit(0) torch3-3.1.orig/ChangeLog0000644000175000017500000001202510106445234015445 0ustar kalfakalfa00000000000000=============================================================================== Torch 3.1 =============================================================================== core ==== * Bagging.cc - Initialize "is_selected_examples" to NULL. * ClassFormatDataSet.cc - Convert all the frames of targets, and not only the first one. * ClassMeasurer.cc - Scan all the frames of inputs to compute classification error, and not only the first one. - The confusion matrix computation option at the end of the training as been removed. The confusion matrix is still available at each iteration. * CmdOption.cc - When calling loadXFile(), a string not always allocated was freed. * DiskDataSet.cc - The number of examples was computed using io_inputs, even if it was NULL. * KFold.cc - The size of the folds is more balanced for small folds. * MemoryDataSet.cc - The number of examples was computed using io_inputs, even if it was NULL. * MemoryXFile.cc - The EOF flag was not updated when reaching the end of file while reading with the scanf method. * MultiClassFormat.cc - Serious bug when auto-detecting classes corrected. * Random.cc - Using now memmove instead of memcopy (possibility of overlapping source and destination). matrix ====== * Mat.cc and Mat.h - Initializing a matrix from a "real *" now possible. gradients ========= * SumMachine.cc - The gradient is now well back-propagated. kernels ======= * QCTrainer.cc - Bug when updating alpha corrected. Note: very rare case in practice. Thanks to Stephen Schiller for the report of this tricky bug. distributions ============= * DiagonalGMM.cc - log_probabilities are now correctely computed for the viterbi case - frameExpectation becomes frameDecision - keep the best gaussian for the current frame * Distribution.cc - resize the output of the machine correctely - added decision and frameDecision method * HMM.cc - added states shared parameters * MAPDiagonalGMM.cc - adapt the parameters only if the gaussian have "seen" at least one frame * Multinomial.cc - added equal initialization option - added check that the log weights are number - frameExpectation becomes frameDecision * NLLCriterion.cc - correct the beta and outputs resize * ParzenDistribution.cc - correct the resize of the log_probablities - added frameExpectation method * TableLookupDistribution.cc - correction of some bugs to calculate the log probablity * ViterbiTrainer.cc ,ViterbiTrainer.h - this class is suppressed, it was already included in EMTrainer datasets ======== * IOHTK.cc - correction of some major bugs when reading the HTK files on disk on unsequential mode - correction of major bug on double mode * IOHTKTarget.cc - space followed by tabulation allowed instead of space only - added some check * IOHTKTarget.h - make saveSequence static * Vocabulary.cc - bound check corrected for the number of words speech ====== * EditDistance.{cc,h} and EditDistanceMeasurer.{cc,h} - Add a constructor option to print a confusion matrix as well * WordSeg.{cc,h} and FrameSeg.{cc,h} - Two new class to keep the word and frame segmentations found by viterbi decoding. These are kept separate so as to be compatible with any new subclass of SpeechHMM. * WordSegMeasurer.{cc,h} and FrameSegMeasurer.{cc,h} - Measurers that outputs information related to the number of errors in terms of word error rate (WordSegMeasurer) or frame error rate (FrameSegMeasurer), using the EditDistance class to compute the error. * SimpleDecoderSpeechHMM.{cc,h} - Modifications to take into account the new WordSeg and FrameSeg classes - added a structure (previous_states[] and n_previous_states) to speedup the decoding process by only looping on existing transitions and not all possible transitions - Added an option to be able to decode using "forced alignment", hence instead of decoding on the whole grammar, decoding on a given (true) sentence * SpeechHMM.cc - bug corrections related to initialization torch3-3.1.orig/LICENSE0000644000175000017500000000271110106445234014701 0ustar kalfakalfa00000000000000 Copyright (c) 2003--2004 Ronan Collobert Copyright (c) 2003--2004 Samy Bengio Copyright (c) 2003--2004 Johnny Mariéthoz All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. torch3-3.1.orig/distributions/0000755000175000017500000000000010106445237016600 5ustar kalfakalfa00000000000000torch3-3.1.orig/distributions/BayesClassifier.cc0000644000175000017500000000665010106445236022165 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // and Bison Ravi (francois.belisle@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "BayesClassifier.h" #include "log_add.h" namespace Torch { BayesClassifier::BayesClassifier(BayesClassifierMachine* machine_) : Trainer(machine_) { bayesmachine = (BayesClassifierMachine*)machine; n_classes = bayesmachine->n_trainers; classes = (int**) allocator->alloc(n_classes * sizeof(int*)); for(int i = 0;i < n_classes ;i++) classes[i] = (int*) allocator->alloc(1 * sizeof(int)); classes_n = (int*)allocator->alloc(n_classes * sizeof(int)); } BayesClassifier::~BayesClassifier() { } void BayesClassifier::train(DataSet *data, MeasurerList *measurers) { message("BayesClassifier: Training"); // attribute the classes for (int i=0;irealloc(classes[i],data->n_examples * sizeof(int)); } machine->setDataSet(data); for (int i=0;in_examples;i++) { data->setExample(i); int c = bayesmachine->class_format->getClass(data->targets->frames[0]); classes[c][classes_n[c]++] = i; } // eventually compute prior given training set if (bayesmachine->allocated_log_priors) { real log_n = log((real)data->n_examples); for (int i=0;i 0) bayesmachine->log_priors[i] = log((real)classes_n[i]) - log_n; else bayesmachine->log_priors[i] = LOG_ZERO; } for(int c = 0;c < n_classes;c++) { data->pushSubset(classes[c],classes_n[c]); bayesmachine->trainers[c]->machine->reset(); if (data->n_examples > 0) { if (bayesmachine->trainers_measurers) bayesmachine->trainers[c]->train(data,bayesmachine->trainers_measurers[c]); else bayesmachine->trainers[c]->train(data,NULL); } else { warning("BayesClassifier: there was no examples to train class %d",c); } data->popSubset(); } if (measurers) { test(measurers); } } } torch3-3.1.orig/distributions/BayesClassifier.h0000644000175000017500000000513610106445236022025 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // and Bison Ravi (francois.belisle@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef BAYES_CLASSIFIER_INC #define BAYES_CLASSIFIER_INC #include "Trainer.h" #include "BayesClassifierMachine.h" namespace Torch { /** A multi class bayes classifier -- maximizes the likelihood of each class separately using a trainer for distribution. When testing, the predicted class corresponds to the trainer giving the maximum output, weighted by its prior probability. @author Samy Bengio (bengio@idiap.ch) @author Bison Ravi (francois.belisle@idiap.ch) */ class BayesClassifier : public Trainer { public: /// the bayes machine BayesClassifierMachine* bayesmachine; /// the number of different classes int n_classes; /// all the example indices of each class. int** classes; ///the number of examples per class. int* classes_n; /// you need to define a BayesClassifierMachine to use this class BayesClassifier( BayesClassifierMachine* ); virtual ~BayesClassifier(); virtual void train( DataSet *data, MeasurerList *measurers); }; } #endif torch3-3.1.orig/distributions/BayesClassifierMachine.cc0000644000175000017500000000664710106445236023460 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // and Bison Ravi (francois.belisle@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "BayesClassifierMachine.h" namespace Torch { BayesClassifierMachine::BayesClassifierMachine(EMTrainer** trainers_, int n_trainers_, MeasurerList** trainers_measurers_, ClassFormat* class_format_,real* log_priors_) { allocated_log_priors = false; trainers = trainers_; n_trainers = n_trainers_; trainers_measurers = trainers_measurers_; class_format = class_format_; log_probabilities = new(allocator) Sequence(1,n_trainers); //if we are not given any log_prior class probabilities, //then we will assume training set proportions. if(log_priors_ != NULL) log_priors = log_priors_; else { allocated_log_priors = true; log_priors = (real*) allocator->alloc (n_trainers * sizeof(real)); //as a first approximation for(int i = 0;i < n_trainers;i++) log_priors[i] = -log((real)n_trainers); } n_outputs = class_format->getOutputSize(); outputs = new(allocator)Sequence(1,n_outputs); } BayesClassifierMachine::~BayesClassifierMachine() { } void BayesClassifierMachine::forward(Sequence *inputs) { for(int trainer = 0;trainer < n_trainers;trainer++) { trainers[trainer]->machine->forward(inputs); log_probabilities->frames[0][trainer] = trainers[trainer]->distribution->log_probability + log_priors[trainer]; } //transform the output from one_hot representation to class_format class_format->fromOneHot(outputs->frames[0],log_probabilities->frames[0]); } void BayesClassifierMachine::reset() { /* I think this is not necessary as it is done in the train method... for(int i = 0;i < n_trainers;i++) trainers[i]->machine->reset(); */ } void BayesClassifierMachine::loadXFile(XFile* file) { for(int i = 0;i < n_trainers;i++) trainers[i]->loadXFile(file); } void BayesClassifierMachine::saveXFile(XFile* file) { for(int i = 0;i < n_trainers;i++) trainers[i]->saveXFile(file); } } torch3-3.1.orig/distributions/BayesClassifierMachine.h0000644000175000017500000000720310106445236023307 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // and Bison Ravi (francois.belisle@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef BAYES_CLASSIFIER_MACHINE_INC #define BAYES_CLASSIFIER_MACHINE_INC #include "Machine.h" #include "EMTrainer.h" #include "ClassFormat.h" namespace Torch { /** BayesClassifierMachine is the machine used by the #BayesClassifier# trainer to perform a Bayes Classification using different distributions. The output corresponds to the class that is the most probable (using prior AND posterior information). @author Samy Bengio (bengio@idiap.ch) @author Bison Ravi (francois.belisle@idiap.ch) */ class BayesClassifierMachine : public Machine { public: /// the number of classes corresponds to the number of #Trainer# int n_trainers; /// the number of outputs in this machine int n_outputs; /// the actual trainers (EMTrainer since we are training distributions). EMTrainer** trainers; /** the log_prior probabilities of each class. default: log_priors are taken as the log of the proportions in the training set. */ real* log_priors; /// it contains the log posterior probability plus the log prior of the class. Sequence* log_probabilities; /// used to know if log_priors where given or allocated bool allocated_log_priors; /// the class format of the output ClassFormat* class_format; /// the measurers for each individual trainer MeasurerList** trainers_measurers; /** creates a machine for BayesClassifier trainers, given a vector of trainers (one per class), an associate measurer for each trainer, a class_format that explains how the classes are coded, and an eventual vector (of size #n_trainers_#) containing the log of the class priors. */ BayesClassifierMachine( EMTrainer**, int n_trainers_, MeasurerList** trainers_measurers_ , ClassFormat* class_format_, real* log_priors_=NULL); virtual ~BayesClassifierMachine(); /** definition of virtual functions of #Machine# */ virtual void forward(Sequence *inputs); virtual void reset(); virtual void loadXFile( XFile* ); virtual void saveXFile( XFile* ); }; } #endif torch3-3.1.orig/distributions/DiagonalGMM.cc0000644000175000017500000003056310106445236021174 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // and Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "DiagonalGMM.h" #include "MeanVarNorm.h" #include "log_add.h" #include "Random.h" namespace Torch { DiagonalGMM::DiagonalGMM(int n_inputs_, int n_gaussians_, EMTrainer* initial_kmeans_trainer_) : Distribution(n_inputs_,(n_inputs_*n_gaussians_)*2+n_gaussians_) { n_gaussians = n_gaussians_; initial_kmeans_trainer = initial_kmeans_trainer_; initial_kmeans_trainer_measurers = NULL; var_threshold = (real*)allocator->alloc(sizeof(real)*n_inputs); for (int i=0;idata[0]; real* dp = (real*)der_params->data[0]; log_weights = p; dlog_weights = dp; p += n_gaussians; dp += n_gaussians; means = (real**)allocator->alloc(sizeof(real*)*n_gaussians); dmeans = (real**)allocator->alloc(sizeof(real*)*n_gaussians); var = (real**)allocator->alloc(sizeof(real*)*n_gaussians); dvar = (real**)allocator->alloc(sizeof(real*)*n_gaussians); means_acc = (real**)allocator->alloc(sizeof(real*)*n_gaussians); var_acc = (real**)allocator->alloc(sizeof(real*)*n_gaussians); weights_acc = (real*)allocator->alloc(sizeof(real)*n_gaussians); minus_half_over_var = (real**)allocator->alloc(sizeof(real*)*n_gaussians); for (int i=0;ialloc(sizeof(real)*n_inputs); var_acc[i] = (real*)allocator->alloc(sizeof(real)*n_inputs); minus_half_over_var[i] = (real*)allocator->alloc(sizeof(real)*n_inputs); } sum_log_var_plus_n_obs_log_2_pi = (real*)allocator->alloc(sizeof(real)*n_gaussians); best_gauss = -1; best_gauss_per_frame = new(allocator)Sequence(1,1); } void DiagonalGMM::setVarThreshold(real* var_threshold_){ for (int i=0;itrain(data_,initial_kmeans_trainer_measurers); params->copy(initial_kmeans_trainer->distribution->params); } //check variance flooring for (int i=0;iresize(inputs->n_frames); log_probabilities_g->resize(inputs->n_frames); log_probabilities->resize(inputs->n_frames); } void DiagonalGMM::display() { for(int i=0;in_frames;i++) generateObservation(sequence->frames[i]); } void DiagonalGMM::generateObservation(real* observation) { real v_tot,v_partial; v_tot = Random::uniform(); v_partial = 0.; real* lw = log_weights; int j; for (j=0;j v_tot) break; } if(j>=n_gaussians) j = n_gaussians - 1; real* v = var[j]; real* m = means[j]; real* obs = observation; for (int i=0;iframes[t]; real max_lpg = LOG_ZERO; real max_lp = LOG_ZERO; real lpg_w = 0; for (int i=0;i max_lpg){ max_lpg = lpg_w; best_gauss = i; max_lp = *lpg; } } log_probabilities->frames[t][0] = max_lp; best_gauss_per_frame->frames[t][0] = (real)best_gauss; return max_lp; } real DiagonalGMM::frameLogProbability(int t, real *inputs) { real *p_log_w = log_weights; real *lpg = log_probabilities_g->frames[t]; real log_prob = LOG_ZERO; for (int i=0;iframes[t][0] = log_prob; return log_prob; } void DiagonalGMM::frameViterbiAccPosteriors(int t, real *inputs, real log_posterior) { real *p_weights_acc = weights_acc; real *lp_i = log_probabilities_g->frames[t]; real *log_w_i = log_weights; real max_lpg = LOG_ZERO; int best_g = 0; //findmax for (int i=0;i max_lpg){ best_g = i; max_lpg = post_i; } } p_weights_acc[best_g] += 1; real* means_acc_i = means_acc[best_g]; real* var_acc_i = var_acc[best_g]; real *x = inputs; for(int j = 0; j < n_inputs; j++) { *var_acc_i++ += *x * *x; *means_acc_i++ += *x++; } } void DiagonalGMM::frameEMAccPosteriors(int t, real *inputs, real log_posterior) { real log_prob = log_probabilities->frames[t][0]; real *p_weights_acc = weights_acc; real *lp_i = log_probabilities_g->frames[t]; real *log_w_i = log_weights; for (int i=0;i= var_threshold[j] ? v : var_threshold[j]; } } } // then the weights real sum_weights_acc = 0; p_weights_acc = weights_acc; for (int i=0;iframes[t][0]; real *lp_i = log_probabilities_g->frames[t]; real *lw = log_weights; real* dlw = dlog_weights; for (int i=0;iresize(n_outputs); } real Distribution::logProbability(Sequence *inputs) { real ll = 0; for (int i=0;in_frames;i++) { ll += frameLogProbability(i,inputs->frames[i]); } return ll; } real Distribution::viterbiLogProbability(Sequence *inputs) { real ll = 0; for (int i=0;in_frames;i++) { ll += viterbiFrameLogProbability(i,inputs->frames[i]); } return ll; } real Distribution::viterbiFrameLogProbability(int t, real *inputs) { return frameLogProbability(t, inputs); } real Distribution::frameLogProbability(int t, real *inputs) { return LOG_ZERO; } void Distribution::frameGenerate(int t, real *inputs) { } void Distribution::iterInitialize() { eMIterInitialize(); } void Distribution::eMIterInitialize() { } void Distribution::eMSequenceInitialize(Sequence* inputs) { log_probabilities->resize(inputs->n_frames); } void Distribution::sequenceInitialize(Sequence* inputs) { log_probabilities->resize(inputs->n_frames); } void Distribution::eMAccPosteriors(Sequence *inputs, real log_posterior) { for (int i=0;in_frames;i++) { frameEMAccPosteriors(i, inputs->frames[i], log_posterior); } } void Distribution::viterbiAccPosteriors(Sequence *inputs, real log_posterior) { for (int i=0;in_frames;i++) { frameViterbiAccPosteriors(i, inputs->frames[i], log_posterior); } } void Distribution::frameEMAccPosteriors(int t, real *inputs, real log_posterior) { } void Distribution::frameViterbiAccPosteriors(int t, real *inputs, real log_posterior) { } void Distribution::eMUpdate() { } void Distribution::update() { } void Distribution::decode(Sequence *inputs) { } void Distribution::forward(Sequence *inputs) { sequenceInitialize(inputs); log_probability = logProbability(inputs); outputs->frames[0][0] = log_probability; } void Distribution::eMForward(Sequence *inputs) { eMSequenceInitialize(inputs); log_probability = logProbability(inputs); } void Distribution::viterbiForward(Sequence *inputs) { eMSequenceInitialize(inputs); log_probability = viterbiLogProbability(inputs); } void Distribution::backward(Sequence *inputs, Sequence *alpha) { for (int i=0;in_frames;i++) { frameBackward(i, inputs->frames[i], NULL, NULL, alpha->frames[0]); } } void Distribution::viterbiBackward(Sequence *inputs, Sequence *alpha) { backward(inputs,alpha); } void Distribution::frameBackward(int t, real *f_inputs, real *beta_, real *f_outputs, real *alpha_) { } void Distribution::loadXFile(XFile *file) { params->loadXFile(file); eMIterInitialize(); } void Distribution::decision(Sequence* decision) { for (int i=0;in_frames;i++) { frameDecision(i,decision->frames[i]); } } void Distribution::frameDecision(int t, real *decision) { } Distribution::~Distribution() { } } torch3-3.1.orig/distributions/Distribution.h0000644000175000017500000001214510106445236021432 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef DISTRIBUTION_INC #define DISTRIBUTION_INC #include "GradientMachine.h" namespace Torch { /** This class is designed to handle generative distribution models such as Gaussian Mixture Models and Hidden Markov Models. As distribution inherits from GradientMachine, they can be trained by gradient descent or by Expectation Maximization (EM) or even Viterbi. Note that the output of a distribution is the negative log likelihood. @author Samy Bengio (bengio@idiap.ch) */ class Distribution : public GradientMachine { public: /// the log likelihood real log_probability; /// the log likelihood for each frame when available Sequence* log_probabilities; /// Distribution(int n_inputs_,int n_params_=0); /// Returns the log probability of a sequence represented by #inputs# virtual real logProbability(Sequence* inputs); /// Returns the viterbi score of a sequence represented by #inputs# virtual real viterbiLogProbability(Sequence* inputs); /// Returns the log probability of a frame of a sequence virtual real frameLogProbability(int t, real *f_inputs); /// Returns the log probability of a frame of a sequence on viterbi mode virtual real viterbiFrameLogProbability(int t, real *f_inputs); virtual void frameGenerate(int t, real *inputs); /** Methods used to initialize the model at the beginning of each EM iteration */ virtual void eMIterInitialize(); /** Methods used to initialize the model at the beginning of each gradient descent iteration */ virtual void iterInitialize(); /** Methods used to initialize the model at the beginning of each example during EM training */ virtual void eMSequenceInitialize(Sequence* inputs); /** Methods used to initialize the model at the beginning of each example during gradient descent training */ virtual void sequenceInitialize(Sequence* inputs); /// The backward step of EM for a sequence virtual void eMAccPosteriors(Sequence *inputs, real log_posterior); /// The backward step of EM for a frame virtual void frameEMAccPosteriors(int t, real *f_inputs, real log_posterior); /// The backward step of Viterbi learning for a sequence virtual void viterbiAccPosteriors(Sequence *inputs, real log_posterior); /// The backward step of Viterbi for a frame virtual void frameViterbiAccPosteriors(int t, real *f_inputs, real log_posterior); /// The update after each iteration for EM virtual void eMUpdate(); /// The update after each gradient iteration virtual void update(); /// For some distribution like SpeechHMM, decodes the most likely path virtual void decode(Sequence *inputs); virtual void forward(Sequence *inputs); /// Same as forward, but for EM virtual void eMForward(Sequence *inputs); /// Same as forward, but for Viterbi virtual void viterbiForward(Sequence *inputs); virtual void backward(Sequence *inputs, Sequence *alpha); /// Same as backward, but for one frame only virtual void frameBackward(int t, real *f_inputs, real *beta_, real *f_outputs, real *alpha_); /// Same as backward, but for Viterbi virtual void viterbiBackward(Sequence *inputs, Sequence *alpha); virtual void loadXFile(XFile *file); /// Returns the decision of the distribution /// decision is expectation for regression, class likelihoods for classif virtual void decision(Sequence* decision); /// Returns the decision of a frame of a sequence virtual void frameDecision(int t, real *decision); virtual ~Distribution(); }; } #endif torch3-3.1.orig/distributions/EMTrainer.cc0000644000175000017500000001477010106445236020745 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "EMTrainer.h" #include "log_add.h" namespace Torch { EMTrainer::EMTrainer(Distribution *distribution_) : Trainer(distribution_) { distribution = distribution_; addROption("end accuracy", &end_accuracy, 0.0001,"end accuracy"); addIOption("max iter", &max_iter, 100, "maximum number of iterations"); addBOption("viterbi", &viterbi, false, "Viterbi training"); } void EMTrainer::train(DataSet* data, MeasurerList *measurers) { int iter = 0; int n_train = data->n_examples; real prev_nll = INF; real nll = INF; DataSet **datas; Measurer ***mes; int *n_mes; int n_datas; machine->setDataSet(data); message("EMTrainer: training"); if(measurers) { for(int i = 0; i < measurers->n_nodes; i++) measurers->nodes[i]->reset(); } Allocator *allocator_ = extractMeasurers(measurers, data, &datas, &mes, &n_mes, &n_datas); while (1) { distribution->eMIterInitialize(); nll = 0; int tot_n_frames = 0; for (int t=0;tsetExample(t); if (viterbi) { distribution->viterbiForward(data->inputs); nll -= distribution->log_probability; distribution->viterbiAccPosteriors(data->inputs,LOG_ONE); } else { distribution->eMForward(data->inputs); nll -= distribution->log_probability; distribution->eMAccPosteriors(data->inputs,LOG_ONE); } tot_n_frames += data->inputs->n_frames; for(int i = 0; i < n_mes[0]; i++) mes[0][i]->measureExample(); } nll /= tot_n_frames; distribution->eMUpdate(); for(int i = 0; i < n_mes[0]; i++) mes[0][i]->measureIteration(); // for each supplementary dataset given, simply compute // test llr (not used for training) for(int julie = 1; julie < n_datas; julie++) { DataSet *dataset = datas[julie]; for(int t=0;tn_examples;t++) { dataset->setExample(t); if (viterbi) { distribution->viterbiForward(dataset->inputs); } else { distribution->eMForward(dataset->inputs); } for(int i = 0; i < n_mes[julie]; i++) mes[julie][i]->measureExample(); } for(int i = 0; i < n_mes[julie]; i++) mes[julie][i]->measureIteration(); } // stopping criterion if ((prev_nll == nll) || fabs((prev_nll - nll)/prev_nll) < end_accuracy) { print("\n"); break; } prev_nll = nll; print("."); iter++; if ((iter >= max_iter) && (max_iter > 0)) { print("\n"); warning("EMTrainer: you have reached the maximum number of iterations"); break; } } for(int i=0;imeasureEnd(); } delete allocator_; } void EMTrainer::test(MeasurerList *measurers) { DataSet **datas; Measurer ***mes; int *n_mes; int n_datas; print("# EMTrainer: testing ["); //message("emtrainer: testing"); Allocator *allocator_ = extractMeasurers(measurers, NULL, &datas, &mes, &n_mes, &n_datas); //// int n_ex = 0; for(int andrea = 0; andrea < n_datas; andrea++) n_ex += datas[andrea]->n_examples; real n_ex_mod = ((n_ex == 0)? 0. : 10.1/((real)n_ex)); real ex_curr = 0; real n_dots = 0; //// for(int andrea = 0; andrea < n_datas; andrea++) { DataSet *dataset = datas[andrea]; for(int i = 0; i < n_mes[andrea]; i++) mes[andrea][i]->reset(); distribution->eMIterInitialize(); for(int t = 0; t < dataset->n_examples; t++) { dataset->setExample(t); if (viterbi) { distribution->viterbiForward(dataset->inputs); } else { distribution->eMForward(dataset->inputs); } for(int i = 0; i < n_mes[andrea]; i++) mes[andrea][i]->measureExample(); if(++ex_curr * n_ex_mod >= (n_dots+1)) { if(n_ex < 10) print("_"); else print("."); n_dots++; } } for(int i = 0; i < n_mes[andrea]; i++) mes[andrea][i]->measureIteration(); for(int i = 0; i < n_mes[andrea]; i++) mes[andrea][i]->measureEnd(); } print("]\n"); delete allocator_; } void EMTrainer::decode(MeasurerList *measurers) { DataSet **datas; Measurer ***mes; int *n_mes; int n_datas; message("emtrainer: decoding"); Allocator *allocator_ = extractMeasurers(measurers, NULL, &datas, &mes, &n_mes, &n_datas); for(int andrea = 0; andrea < n_datas; andrea++) { DataSet *dataset = datas[andrea]; distribution->setDataSet(dataset); for(int i = 0; i < n_mes[andrea]; i++) mes[andrea][i]->reset(); distribution->eMIterInitialize(); for(int t = 0; t < dataset->n_examples; t++) { dataset->setExample(t); distribution->decode(dataset->inputs); for(int i = 0; i < n_mes[andrea]; i++) mes[andrea][i]->measureExample(); } for(int i = 0; i < n_mes[andrea]; i++) mes[andrea][i]->measureIteration(); for(int i = 0; i < n_mes[andrea]; i++) mes[andrea][i]->measureEnd(); } delete allocator_; } EMTrainer::~EMTrainer() { } } torch3-3.1.orig/distributions/EMTrainer.h0000644000175000017500000000507210106445236020602 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef EMTRAINER_INC #define EMTRAINER_INC #include "Trainer.h" #include "Distribution.h" namespace Torch { /** This class is used to train any distribution using the EM algorithm. It can also train using the Viterbi training algorithm. @author Samy Bengio (bengio@idiap.ch) */ class EMTrainer : public Trainer { public: /// the distribution to train Distribution *distribution; /// the stopping criterion regarding the accuracy for EM real end_accuracy; /// the stopping criterion regarding the number of iterations for EM int max_iter; /// when viterbi is true, use Viterbi training instead of EM training bool viterbi; /// EMTrainer(Distribution *distribution_); virtual void train(DataSet* data, MeasurerList *measurers); virtual void test(MeasurerList *measurers); /** this method computes the most likely path into the distribution. mainly used for sequential distribution such as HMMs. */ virtual void decode(MeasurerList *measurers); virtual ~EMTrainer(); }; } #endif torch3-3.1.orig/distributions/HMM.cc0000644000175000017500000004443210106445236017536 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "HMM.h" #include "FrameSelectorDataSet.h" #include "Random.h" #include "log_add.h" #include "XFile.h" namespace Torch { HMM::HMM(int n_states_, Distribution **states_, real** transitions_, int n_shared_states_,Distribution **shared_states_) : Distribution(states_[1]->n_inputs,n_states_*n_states_) { n_states = n_states_; states = states_; transitions = transitions_; // if given, these represent the "real" states, while "states" are // only pointers to shared_states. note that shared_states should all // exist (not like states which is null for states[0] and states[n_states-1]) shared_states = shared_states_; n_shared_states = n_shared_states_; addBOption("initialize", &initialize , true, "initialize the model before training"); addBOption("linear segmentation", &linear_segmentation, false, "linear segmentation to initialize the states"); addROption("prior transitions", &prior_transitions , 1e-3, "minimum weights for each gaussians"); if (n_states > 0) { log_transitions = (real**)allocator->alloc(sizeof(real*)*n_states); dlog_transitions = (real**)allocator->alloc(sizeof(real*)*n_states); transitions_acc = (real**)allocator->alloc(sizeof(real*)*n_states); for (int i=1;iadd(states[i]->params); der_params->add(states[i]->der_params); } for (int i=0;ialloc(sizeof(real)*n_states);; log_transitions[i] = ((real*)params->data[0]) + i*n_states; dlog_transitions[i] = ((real*)der_params->data[0]) + i*n_states; } log_probabilities_s = new (allocator)Sequence(1,n_states); log_alpha = new (allocator)Sequence(1,n_states); log_beta = new (allocator)Sequence(1,n_states); arg_viterbi = new (allocator)Sequence(1,n_states); viterbi_sequence = new (allocator)Sequence(1,1); } } void HMM::loadXFile(XFile *file) { // first the transitions file->taggedRead(params->data[0], sizeof(real), n_states*n_states,"HMM"); for (int i=0;idata[0] + i*n_states; // then the emissions for (int i=1;iloadXFile(file); } } void HMM::saveXFile(XFile *file) { // first the transitions file->taggedWrite(params->data[0], sizeof(real), n_states*n_states,"HMM"); // then the emissions for (int i=1;isaveXFile(file); } } void HMM::setDataSet(DataSet* data_) { if (initialize) { // the emission distributions int* selected_frames = (int*)allocator->alloc(sizeof(int)*1); for (int i=1;in_examples; if (linear_segmentation) { for (int j=0;jsetExample(j); int n_frames = data_->inputs->n_frames; real n_frames_per_state = (real)n_frames/(n_states-2); int from = (int)((i-1)*n_frames_per_state); int to = (i == n_states-2 ? n_frames : (int)(i*n_frames_per_state)); int n_selected_frames = to - from; selected_frames = (int*)allocator->realloc(selected_frames,sizeof(int)*n_selected_frames); int k = 0; for (int l=from;lsetExample(j); int n_frames = data_->inputs->n_frames; int n_selected_frames = n_frames; selected_frames = (int*)allocator->realloc(selected_frames,sizeof(int)*n_selected_frames); for (int l=0;lsetDataSet(&frame_sel); } // for the transitions, re-initialize to initial values given in constructor for (int i=0;i 0) *lp = log(*p); else *lp = LOG_ZERO; } } } else { // we still need to set the dataset of the emission distributions for (int i=1;isetDataSet(data_); } } void HMM::printTransitions(bool real_values, bool transitions_only) { print("transitions: %d x %d\n",n_states,n_states); for (int i=0;i %d = %f\n",i,j,exp(log_transitions[j][i])); } } else if (real_values) { print("%f ",exp(log_transitions[j][i])); } else { print("%d ",(log_transitions[j][i] != LOG_ZERO)); } } print("\n"); } } void HMM::logAlpha(Sequence* inputs) { // first, initialize everything to LOG_ZERO for (int f=0;fn_frames;f++) { for (int i=1;iframes[f][i] = LOG_ZERO; } } // case for first frame for (int i=1;iframes[0][i] = log_probabilities_s->frames[0][i] + log_transitions[i][0]; } // other cases for (int f=1;fn_frames;f++) { for (int i=1;iframes[f][i] = logAdd(log_alpha->frames[f][i], log_transitions[i][j] + log_probabilities_s->frames[f][i] + log_alpha->frames[f-1][j]); } } } // last case log_probability = LOG_ZERO; int f = inputs->n_frames-1; int i = n_states-1; for (int j=1;jframes[f][j]+log_transitions[i][j]); } } void HMM::logBeta(Sequence* inputs) { // first, initialize everything to LOG_ZERO for (int f=0;fn_frames;f++) { for (int i=1;iframes[f][i] = LOG_ZERO; } } // case for last frame int f_final = inputs->n_frames-1; for (int i=1;iframes[f_final][i] = log_transitions[n_states-1][i]; } // other cases for (int f=inputs->n_frames-2;f>=0;f--) { for (int i=1;iframes[f][j] = logAdd(log_beta->frames[f][j], log_transitions[i][j] + log_probabilities_s->frames[f+1][i] + log_beta->frames[f+1][i]); } } } } void HMM::logViterbi(Sequence* inputs) { // first, initialize everything to LOG_ZERO for (int f=0;fn_frames;f++) { for (int i=1;iframes[f][i] = LOG_ZERO; } } // case for first frame for (int i=1;iframes[0][i] + log_transitions[i][0]; if (v > log_alpha->frames[0][i]) { log_alpha->frames[0][i] = v; arg_viterbi->frames[0][i] = 0.0; } } // other cases for (int f=1;fn_frames;f++) { for (int i=1;iframes[f][i] + log_alpha->frames[f-1][j]; if (v > log_alpha->frames[f][i]) { log_alpha->frames[f][i] = v; arg_viterbi->frames[f][i] = (real)j; } } } } // last case log_probability = LOG_ZERO; int f = inputs->n_frames-1; int i = n_states-1; for (int j=1;jframes[f][j]+log_transitions[i][j]; if (v > log_probability) { log_probability = v; last_arg_viterbi = j; } } // now recall the state sequence if (log_probability > LOG_ZERO) { viterbi_sequence->frames[inputs->n_frames-1][0] = last_arg_viterbi; for (int f=inputs->n_frames-2;f>=0;f--) { viterbi_sequence->frames[f][0] = (real)(arg_viterbi->frames[f+1][(int)(viterbi_sequence->frames[f+1][0])]); } } else { warning("sequence impossible to train: probably too short for target"); for (int f=0;fn_frames;f++) viterbi_sequence->frames[f][0] = -1; log_probability = 0; } } void HMM::logProbabilities(Sequence *inputs) { if (n_shared_states == 0) { for (int f=0;fn_frames;f++) { for (int i=1;iframes[f][i] = states[i]->frameLogProbability(f, inputs->frames[f]); } } } else { for (int f=0;fn_frames;f++) { for (int i=0;iframeLogProbability(f, inputs->frames[f]); } for (int i=1;iframes[f][i] = states[i]->log_probabilities->frames[f][0]; } } } } real HMM::logProbability(Sequence *inputs) { logProbabilities(inputs); logAlpha(inputs); log_probabilities->frames[0][0] = log_probability; return log_probability; } real HMM::viterbiLogProbability(Sequence *inputs) { logProbabilities(inputs); logViterbi(inputs); log_probabilities->frames[0][0] = log_probability; return log_probability; } void HMM::eMSequenceInitialize(Sequence* inputs) { log_probabilities_s->resize(inputs->n_frames); log_alpha->resize(inputs->n_frames); log_beta->resize(inputs->n_frames); arg_viterbi->resize(inputs->n_frames); viterbi_sequence->resize(inputs->n_frames); for (int i=1;ieMSequenceInitialize(inputs); } void HMM::sequenceInitialize(Sequence* inputs) { log_probabilities_s->resize(inputs->n_frames); log_alpha->resize(inputs->n_frames); log_beta->resize(inputs->n_frames); arg_viterbi->resize(inputs->n_frames); viterbi_sequence->resize(inputs->n_frames); for (int i=1;isequenceInitialize(inputs); } void HMM::eMIterInitialize() { for (int i=1;ieMIterInitialize(); for (int i=0;iiterInitialize(); for (int i=0;in_frames;f++) { for (int i=1;iframes[f][i] != LOG_ZERO && log_beta->frames[f][i] != LOG_ZERO) { real log_posterior_i_f = log_posterior + log_alpha->frames[f][i] + log_beta->frames[f][i] - log_probability; states[i]->frameEMAccPosteriors(f, inputs->frames[f],log_posterior_i_f); } } } for (int f=1;fn_frames;f++) { for (int i=1;iframes[f][i]; for (int j=1;jframes[f-1][j] != LOG_ZERO && log_beta->frames[f][i] != LOG_ZERO && log_emit_i != LOG_ZERO) transitions_acc[i][j] += exp(log_posterior + log_alpha->frames[f-1][j] + log_transitions[i][j] + log_emit_i + log_beta->frames[f][i] - log_probability); } } } // particular case of transitions from initial state for (int j=1;jframes[0][j] != LOG_ZERO && log_probabilities_s->frames[0][j] != LOG_ZERO) transitions_acc[j][0] += exp(log_posterior + log_beta->frames[0][j] + log_probabilities_s->frames[0][j] + log_transitions[j][0] - log_probability); } // particular case of transitions to last state int f = inputs->n_frames-1; int i = n_states-1; for (int j=1;jframes[f][j] != LOG_ZERO) transitions_acc[i][j] += exp(log_posterior + log_alpha->frames[f][j] + log_transitions[i][j] - log_probability); } } void HMM::viterbiAccPosteriors(Sequence *inputs, real log_posterior) { // accumulate the emission and transition posteriors real p = exp(log_posterior); for (int f=0;fn_frames;f++) { int i = (int)(viterbi_sequence->frames[f][0]); if (i>=0) { states[i]->frameEMAccPosteriors(f, inputs->frames[f],log_posterior); int j = (int)arg_viterbi->frames[f][i]; if (j>0) { transitions_acc[i][j] += p; } } } // attention, il me manque le premier et le dernier arg_viterbi... } void HMM::eMUpdate() { // first the states for (int i=1;ieMUpdate(); } // then the transitions; for (int i=0;iupdate(); } // then the transitions; for (int i=0;in_frames;f++) { for (int i=1;iframes[f][i] != LOG_ZERO && log_beta->frames[f][i] != LOG_ZERO) { real posterior_i_f = *alpha->frames[0] * exp(log_alpha->frames[f][i] + log_beta->frames[f][i] - log_probability); states[i]->frameBackward(f,inputs->frames[f],NULL,NULL,&posterior_i_f); } } } // accumulate the transition posteriors for (int f=1;fn_frames;f++) { for (int i=1;iframes[f][i]; for (int j=1;jframes[f-1][j] == LOG_ZERO || log_emit_i == LOG_ZERO || log_beta->frames[f][i] == LOG_ZERO) continue; real posterior_i_j_f = *alpha->frames[0] * exp(log_alpha->frames[f-1][j] + log_transitions[i][j] + log_emit_i + log_beta->frames[f][i] - log_probability); dlog_transitions[i][j] += posterior_i_j_f; for (int k=1;kframes[0][j] == LOG_ZERO || log_probabilities_s->frames[0][j] == LOG_ZERO) continue; real posterior_i_j_f = *alpha->frames[0] * exp(log_beta->frames[0][j] + log_probabilities_s->frames[0][j] + log_transitions[j][0] - log_probability); dlog_transitions[j][0] += posterior_i_j_f; for (int k=1;kn_frames-1; int i = n_states-1; for (int j=1;jframes[f][j] == LOG_ZERO) continue; real posterior_i_j_f = *alpha->frames[0] * exp(log_alpha->frames[f][j] + log_transitions[i][j] - log_probability); dlog_transitions[i][j] += posterior_i_j_f; for (int k=1;kn_frames;f++) { int i = (int)viterbi_sequence->frames[f][0]; if (fn_frames) { states[i]->frameBackward(f, inputs->frames[f], NULL, NULL, alpha->frames[0]); } int j = (int)arg_viterbi->frames[f][i]; dlog_transitions[i][j] -= *alpha->frames[0]; for (int k=1;kframes[0] *exp(log_transitions[k][j]); } } } void HMM::decode(Sequence* inputs) { eMSequenceInitialize(inputs); logProbabilities(inputs); logViterbi(inputs); } HMM::~HMM() { } } torch3-3.1.orig/distributions/HMM.h0000644000175000017500000001234410106445236017375 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef HMM_INC #define HMM_INC #include "Distribution.h" #include "Trainer.h" namespace Torch { /** This class implements a Hidden Markov Model distribution. It can be trained either by EM, Viterbi, or Gradient Descent. Note that this kind of HMM always contain one initial state and one final state. Both are non-emitting. Note that the log_probabilities is the average over all frames of the log_probability of the example. @author Samy Bengio (bengio@idiap.ch) */ class HMM : public Distribution { public: /** The number of states of the HMM. the first model is the initial state, the last model is the final (absorbing) state, (neither of them are emitting). hence, n_states > 2 */ int n_states; /// a prior on the transition probabilities real prior_transitions; /// keep the emission distributions Distribution** states; /// if the states are in fact shared in some way or another, the original ones are in shared_states Distribution** shared_states; int n_shared_states; bool linear_segmentation; /// the initial transitions between states are kept as a matrix real** transitions; /// in fact, we keep the transitions in log real** log_transitions; /// the derivative of the log transitions for gradient descent real** dlog_transitions; /// the accumulators of the transitions for EM real** transitions_acc; /// accumulator used in the forward phase to compute log likelihood Sequence* log_alpha; /// accumulator used in the backward phase to compute log likelihood Sequence* log_beta; /// for each state, for each time step, keep the best predecessor Sequence* arg_viterbi; /// arg_viterbi of the finishing state int last_arg_viterbi; /// for each time step, keep the best state Sequence* viterbi_sequence; /// keep for each time step and each model its emission log probability Sequence* log_probabilities_s; /// do we need to initialize the model? bool initialize; HMM(int n_states_, Distribution **states_, real** transitions_, int n_shared_states = 0, Distribution **shared_states_ = NULL); virtual void setDataSet(DataSet* data_); virtual void loadXFile(XFile *file); virtual void saveXFile(XFile *file); /// this method can be used for debugging purpose to see the transitions virtual void printTransitions(bool real_values=false,bool transitions_only=false); /// computes the log_alpha during forward phase of EM virtual void logAlpha(Sequence* inputs); /// computes the log_beta during backward phase of EM virtual void logBeta(Sequence* inputs); /// computes the log_viterbi during forward phase of Viterbi virtual void logViterbi(Sequence* inputs); /// this method returns the state sequence associated to the input virtual void decode(Sequence* input); /** computes for each state and each time step of the sequence #inputs# its associated emission probability. */ virtual void logProbabilities(Sequence *inputs); virtual real logProbability(Sequence *inputs); virtual real viterbiLogProbability(Sequence *inputs); virtual void iterInitialize(); virtual void eMIterInitialize(); virtual void eMSequenceInitialize(Sequence* inputs); virtual void sequenceInitialize(Sequence* inputs); virtual void eMAccPosteriors(Sequence *inputs, real log_posterior); virtual void viterbiAccPosteriors(Sequence *inputs, real log_posterior); virtual void eMUpdate(); virtual void update(); virtual void backward(Sequence *inputs, Sequence *alpha); virtual void viterbiBackward(Sequence *inputs, Sequence *alpha); virtual ~HMM(); }; } #endif torch3-3.1.orig/distributions/KMeans.cc0000644000175000017500000001371610106445236020274 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "KMeans.h" #include "log_add.h" #include "Random.h" namespace Torch { KMeans::KMeans(int n_inputs_, int n_gaussians_) : DiagonalGMM(n_inputs_, n_gaussians_) { min_cluster = new (allocator)Sequence(1,1); addBOption("intitialize parameters", &initialize_parameters , true, "initialize the kmeans parameters from the data"); } void KMeans::setDataSet(DataSet* data_) { // initialize the parameters using some examples in the dataset randomly int tot_n_frames = 0; int* example_size = (int*) allocator->alloc(sizeof(int)*data_->n_examples); int* ex_s = example_size; for(int i=0; in_examples; i++){ data_->getNumberOfFrames(i, ex_s, NULL); tot_n_frames += *ex_s++; } /* if(tot_n_frames < n_gaussians) error("The number of frame: %d is smaller than the number of gaussians: %d",tot_n_frames, n_gaussians); */ int n_part = (int)(tot_n_frames/(real)n_gaussians); int sum = 0; int ex = 0; for (int i=0;isetExample(ex); real *x = data_->inputs->frames[index - sum]; real *means_i = means[i]; real *var_i = var[i]; real *thresh = var_threshold; for(int j = 0; j < n_inputs; j++) { *means_i++ = *x++; *var_i++ = *thresh++; } log_weights[i] = log(1./n_gaussians); } allocator->free(example_size); } void KMeans::eMIterInitialize() { // initialize the accumulators to 0 for (int i=0;iresize(inputs->n_frames); DiagonalGMM::eMSequenceInitialize(inputs); } real KMeans::frameLogProbability(int t, real *inputs) { real min_dist = INF; int min_i = -1; for (int i=0;iframes[t][0] = -min_dist; min_cluster->frames[t][0] = (real)min_i; return -min_dist; } real KMeans::frameLogProbabilityOneGaussian(int g, real *inputs) { real dist = 0; real* means_g = means[g]; real *x = inputs; for(int j = 0; j < n_inputs; j++) { real diff = *x++ - *means_g++; dist += diff*diff; } return dist; } void KMeans::frameEMAccPosteriors(int t, real *inputs, real log_posterior) { int min_i = (int)min_cluster->frames[t][0]; real* means_acc_i = means_acc[min_i]; real* var_acc_i = var_acc[min_i]; real *x = inputs; for(int j = 0; j < n_inputs; j++) { *var_acc_i++ += *x * *x; *means_acc_i++ += *x++; } weights_acc[min_i] ++; } void KMeans::frameBackward(int t, real *f_inputs, real *beta_, real *f_outputs, real *alpha_) { int min_i = (int)min_cluster->frames[t][0]; real* min_means = means[min_i]; real* min_dmeans = dmeans[min_i]; for (int i=0;i= var_threshold[j] ? v : var_threshold[j]; } } } // then the weights real sum_weights_acc = 0; p_weights_acc = weights_acc; for (int i=0;idata[0]; gamma = params->data[0] + n_inputs*n_outputs; der_mu = der_params->data[0]; der_gamma = der_params->data[0] + n_inputs*n_outputs; } void LogRBF::setDataSet(DataSet* data_) { if(initial_kmeans_trainer) { initial_kmeans_trainer->train(data_, NULL); KMeans *kmeans = (KMeans *)initial_kmeans_trainer->distribution; for(int i = 0; i < n_outputs; i++) { real *src = kmeans->means[i]; real *dest = mu + i*n_inputs; for(int j = 0; j < n_inputs; j++) dest[j] = src[j]; } } else { for(int i = 0; i < n_inputs*n_outputs; i++) mu[i] = Random::uniform(); } for(int i = 0; i < n_inputs*n_outputs; i++) gamma[i] = 1./sqrt((real)n_inputs); } void LogRBF::frameForward(int t, real *f_inputs, real *f_outputs) { real *mu_ = mu; real *gamma_ = gamma; for(int i = 0; i < n_outputs; i++) { real out = 0; for(int j = 0; j < n_inputs; j++) { real z = (f_inputs[j] - mu_[j]) * gamma_[j]; out += z*z; } f_outputs[i] = -0.5*out; mu_ += n_inputs; gamma_ += n_inputs; } } void LogRBF::frameBackward(int t, real *f_inputs, real *beta_, real *f_outputs, real *alpha_) { for(int i = 0; i < n_inputs; i++) beta_[i] = 0; real *mu_ = mu; real *gamma_ = gamma; real *der_mu_ = der_mu; real *der_gamma_ = der_gamma; for(int i = 0; i < n_outputs; i++) { real z = alpha_[i]; for(int j = 0; j < n_inputs; j++) { real gamma__ = gamma_[j]; real diff = f_inputs[j] - mu_[j]; real zz = z * diff * gamma__ * gamma__; der_mu_[j] += zz; beta_[j] -= zz; der_gamma_[j] -= z * diff*diff * gamma__; } mu_ += n_inputs; gamma_ += n_inputs; der_mu_ += n_inputs; der_gamma_ += n_inputs; } } LogRBF::~LogRBF() { } } torch3-3.1.orig/distributions/LogRBF.h0000644000175000017500000000517010106445236020026 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef LOG_RBF_INC #define LOG_RBF_INC #include "GradientMachine.h" #include "EMTrainer.h" namespace Torch { /** LogRBF layer for #GradientMachine#. Formally speaking, $ouputs[i] = -0.5 \sum_j gamma_ij^2 * (inputs[j] - mu_ij)^2$.\\ $mu_ij$ and $gamma_ij$ are in #params#, with the following structure:\\ $mu_00... mu_0n, gamma_00.. gamma_0n,..., $\\ For a better initialization, one can provide a #EMTrainer# using a #Kmeans# distribution that will be used to initialize the means and gamma. @author Ronan Collobert (collober@idiap.ch) */ class LogRBF : public GradientMachine { public: real *gamma; real *mu; real *der_gamma; real *der_mu; /// optional initialization using a Kmeans EMTrainer* initial_kmeans_trainer; /// LogRBF(int n_inputs_, int n_outputs_, EMTrainer* kmeans_trainer=NULL); //----- virtual void setDataSet(DataSet* data_); virtual void frameForward(int t, real *f_inputs, real *f_outputs); virtual void frameBackward(int t, real *f_inputs, real *beta_, real *f_outputs, real *alpha_); virtual ~LogRBF(); }; } #endif torch3-3.1.orig/distributions/Makefile0000644000175000017500000000172710106445236020246 0ustar kalfakalfa00000000000000# get user and architecture specific options OS := $(shell uname -s) TORCHDIR := $(shell cd ..; pwd) include ../Makefile_options_$(OS) CC_FILES := $(wildcard *.cc) OBJS := $(foreach f,$(CC_FILES),$(OBJS_DIR)/$(patsubst %.cc,%.o,$(f))) all: $(LIBTORCH) $(LIBTORCH): $(OBJS) @echo "Archiving..." @$(AR) $(LIBTORCH) $(OBJS) $(OBJS_DIR)/%.o: %.cc @echo $< @$(CC) $(CFLAGS_$(MODE)) $(INCS) -o $@ -c $< distclean: @\rm -f .deps_* clean: @echo "Remove objects file and dependencies..." @\rm -Rf $(OBJS) $(LIBTORCH) @\rm -f .deps_$(VERSION_KEY) depend: @echo "Tracking dependencies..." @\rm -f .deps_$(VERSION_KEY) @for file in *.cc ; do printf "$(OBJS_DIR)/" >> .deps_$(VERSION_KEY); $(DEP) $(CFLAGS_$(MODE)) $(INCS) $$file >> .deps_$(VERSION_KEY); done .deps_$(VERSION_KEY): @echo ">>> Please do a 'make depend' <<<" exit 10 ifneq ($(MAKECMDGOALS),distclean) ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),depend) include .deps_$(VERSION_KEY) endif endif endif torch3-3.1.orig/distributions/Multinomial.cc0000644000175000017500000001155210106445236021404 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Multinomial.h" #include "log_add.h" #include "Random.h" namespace Torch { Multinomial::Multinomial(int n_values_) : Distribution(1,n_values_) { n_values = n_values_; addROption("prior weights", &prior_weights , 1e-3, "minimum weights for each gaussians"); addBOption("equal initialization", &equal_initialization , false, "equal initialization"); log_weights = (real*)params->data[0]; dlog_weights = (real*)der_params->data[0]; weights_acc = (real*)allocator->alloc(sizeof(real)*n_values); } void Multinomial::setDataSet(DataSet* data_) { // here, initialize the parameters somehow... real sum = 0.; if (equal_initialization) { // initialize the weights with equal values for (int i=0;iresize(inputs->n_frames); } void Multinomial::sequenceInitialize(Sequence* inputs) { eMSequenceInitialize(inputs); } real Multinomial::frameLogProbability(int t, real *inputs) { int obs = (int)inputs[0]; if (obs < 0 || obs >= n_values) error("Multinomial::frameLogProbability observed an non-realistic value: %d\n",obs); real log_prob = log_weights[obs]; log_probabilities->frames[t][0] = log_prob; return log_prob; } void Multinomial::frameEMAccPosteriors(int t, real *inputs, real log_posterior) { int obs = (int)inputs[0]; if (obs < 0 || obs >= n_values) error("Multinomial::frameEMAccPosteriors observed an non-realistic value: %d\n",obs); weights_acc[obs] += exp(log_posterior); } void Multinomial::eMUpdate() { real* p_weights_acc = weights_acc; real sum_weights_acc = 0; for (int i=0;i= n_values) error("Multinomial::frameBackward observed an non-realistic value: %d\n",obs); dlog_weights[obs] += *alpha_; for (int i=0;iresize(n_outputs); outputs->resize(n_outputs); } void NLLCriterion::reset() { } void NLLCriterion::forward(Sequence *inputs) { outputs->frames[0][0] = 0; for (int i=0;in_frames;i++) outputs->frames[0][0] -= inputs->frames[i][0]; } void NLLCriterion::backward(Sequence *inputs, Sequence *alpha) { beta->frames[0][0] = -1; } NLLCriterion::~NLLCriterion() { } } torch3-3.1.orig/distributions/NLLCriterion.h0000644000175000017500000000440010106445236021252 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // nll criterion. // simply returns the negative log likelihood #ifndef NLL_CRITERION_INC #define NLL_CRITERION_INC #include "Criterion.h" namespace Torch { /** This criterion can be used to train #Distribution# object using the #GMTrainer# trainer. It then maximizes the log likelihood of the data. The #forward# method always return its input, which is the negative log likelihood, while the #backward# method sets the gradient to -1. @author Samy Bengio (bengio@idiap.ch) */ class NLLCriterion : public Criterion { public: /// NLLCriterion(); virtual void reset(); virtual void forward(Sequence *inputs); virtual void backward(Sequence *inputs, Sequence *alpha); virtual ~NLLCriterion(); }; } #endif torch3-3.1.orig/distributions/NLLMeasurer.cc0000644000175000017500000000505510106445236021244 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "NLLMeasurer.h" namespace Torch { NLLMeasurer::NLLMeasurer(Sequence *inputs_, DataSet *data_, XFile *file_) : Measurer(data_, file_) { inputs = inputs_; addBOption("average examples", &average_examples, true, "divided by the number of examples"); addBOption("average frames", &average_frames, true, "divided by the number of frames"); //reset() internal_error = 0; } void NLLMeasurer::measureExample() { real sum = .0; for(int i = 0; i < inputs->n_frames; i++) sum -= inputs->frames[i][0]; // we divide by the number of input frames in the data (and not the // number of output frames) if(average_frames) sum /= data->inputs->n_frames; internal_error += sum; } void NLLMeasurer::measureIteration() { if(average_examples) internal_error /= data->n_examples; if(binary_mode) file->write(&internal_error, sizeof(real), 1); else file->printf("%g\n", internal_error); file->flush(); reset(); } void NLLMeasurer::reset() { internal_error = 0; } NLLMeasurer::~NLLMeasurer() { } } torch3-3.1.orig/distributions/NLLMeasurer.h0000644000175000017500000000453610106445236021111 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef NLL_MEASURER_INC #define NLL_MEASURER_INC #include "Measurer.h" #include "DataSet.h" namespace Torch { /** This class measures the negative log likelihood. In fact, it supposes that the input given is the positive log likelihood. It can then normalized by the total number of frames and/or the number of total of examples. @author Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) */ class NLLMeasurer : public Measurer { public: bool average_examples; bool average_frames; real internal_error; /// contains a pointer to the negative log likelihood to measure Sequence *inputs; /// NLLMeasurer(Sequence *inputs_, DataSet* data_, XFile *file_); virtual void reset(); virtual void measureExample(); virtual void measureIteration(); virtual ~NLLMeasurer(); }; } #endif torch3-3.1.orig/distributions/ParzenDistribution.cc0000644000175000017500000000715510106445236022755 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "ParzenDistribution.h" #include "log_add.h" #include "DataSet.h" namespace Torch { ParzenDistribution::ParzenDistribution(int n_inputs_, real var_) : Distribution(n_inputs_,0) { data = NULL; setVar(var_); n_train_examples_index = 0; train_examples_index = NULL; } void ParzenDistribution::setVar(real var_) { var = var_; sum_log_var_plus_n_obs_log_2_pi = -0.5 * n_inputs*(LOG_2_PI + log(var)); minus_half_over_var = -0.5 / var; } void ParzenDistribution::setDataSet(DataSet* dataset_) { data = dataset_; n_train_examples_index = data->n_examples; train_examples_index = (int*)allocator->realloc(train_examples_index,n_train_examples_index*sizeof(int)); for (int i=0;in_examples;i++) { train_examples_index[i] = data->selected_examples[i]; } } void ParzenDistribution::eMSequenceInitialize(Sequence* inputs) { if (!inputs) return; log_probabilities->resize(inputs->n_frames); } void ParzenDistribution::sequenceInitialize(Sequence* inputs) { eMSequenceInitialize(inputs); } real ParzenDistribution::frameLogProbability(int t, real *inputs) { // first keep the current pointers... Sequence *current_seq = data->inputs; data->pushExample(); // then compute the likelihood... real lp = 0; int tot_n_frames = 0; int *i_ptr = train_examples_index; for (int i=0;isetRealExample(*i_ptr++); Sequence* seq = data->inputs; tot_n_frames += seq->n_frames; for (int j=0;jn_frames;j++) { real lp_ij = frameLogProbabilityOneFrame(seq->frames[j],current_seq->frames[t]); lp += lp_ij; } } lp -= log((real)tot_n_frames); log_probabilities->frames[t][0] = lp; // restore the dataset status data->popExample(); return lp; } real ParzenDistribution::frameLogProbabilityOneFrame(real *inputs, real *mean) { real sum_xmu = 0.; real *x = inputs; real *m = mean; for(int j = 0; j < n_inputs; j++) { real xmu = (*x++ - *m++); sum_xmu += xmu*xmu; } real lp = sum_xmu*minus_half_over_var + sum_log_var_plus_n_obs_log_2_pi; return lp; } ParzenDistribution::~ParzenDistribution() { } } torch3-3.1.orig/distributions/ParzenDistribution.h0000644000175000017500000000546110106445236022615 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef PARZEN_DISTRIBUTION_INC #define PARZEN_DISTRIBUTION_INC #include "Distribution.h" namespace Torch { /** This class can be used to model a Parzen density estimator with a Gaussian kernel: $ p(x) = \frac{1}{N}\sum_i \frac{1}{(2 \Pi var)^{d/2}} \exp(- \frac{||x - x_i||^2}{2 var})$ where the sum is done on the whole training set. @author Samy Bengio (bengio@idiap.ch) */ class ParzenDistribution : public Distribution { public: /// the variance used real var; /// the dataset DataSet* data; /// the indices of the training examples int *train_examples_index; int n_train_examples_index; /** in order to faster the computation, we can do some "pre-computation" pre-computed sum_log_var + n_obs * log_2_pi */ real sum_log_var_plus_n_obs_log_2_pi; /// pre-computed -0.5 / var real minus_half_over_var; ParzenDistribution(int n_inputs_, real var_); virtual void setDataSet(DataSet* dataset_); virtual void setVar(real var_); virtual real frameLogProbability(int t, real *inputs); virtual real frameLogProbabilityOneFrame(real *inputs, real *mean); virtual void eMSequenceInitialize(Sequence* inputs); virtual void sequenceInitialize(Sequence* inputs); virtual ~ParzenDistribution(); }; } #endif torch3-3.1.orig/distributions/TableLookupDistribution.cc0000644000175000017500000000410710106445236023731 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "TableLookupDistribution.h" namespace Torch { TableLookupDistribution::TableLookupDistribution(int column_, bool apply_log_, real prior_) : Distribution(column_+1,0) { column = column_; apply_log = apply_log_; prior = prior_; if (!apply_log) prior = log(prior); } real TableLookupDistribution::frameLogProbability(int t, real *inputs) { real lp = inputs[column]; if (apply_log) lp = log(lp / prior) ; else lp -= prior; log_probabilities->frames[t][0] = lp; return lp; } TableLookupDistribution::~TableLookupDistribution() { } } torch3-3.1.orig/distributions/TableLookupDistribution.h0000644000175000017500000000504110106445236023571 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef TABLE_LOOKUP_DISTRIBUTION_INC #define TABLE_LOOKUP_DISTRIBUTION_INC #include "Distribution.h" namespace Torch { /** This class outputs one of the observations as the logProbability. It can eventually apply a log transformation and/or normalize by a given prior. It can therefore be used in conjunction with HMMs to implement the HMM/ANN hybrid model... @author Samy Bengio (bengio@idiap.ch) */ class TableLookupDistribution : public Distribution { public: /** The column in the observation vector that corresponds to the logProbability. */ int column; /// do we apply a log transformation bool apply_log; /// do we normalize by a given prior real prior; /** The column number corresponds to the logProbability which can be normalized by an eventual prior. */ TableLookupDistribution(int column_ = 0, bool apply_log_ = true, real prior_ = 1.); virtual real frameLogProbability(int t, real *inputs); virtual ~TableLookupDistribution(); }; } #endif torch3-3.1.orig/distributions/MAPDiagonalGMM.cc0000644000175000017500000001565110106445237021534 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // and Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "MAPDiagonalGMM.h" #include "log_add.h" namespace Torch { MAPDiagonalGMM::MAPDiagonalGMM(DiagonalGMM* prior_distribution_) : DiagonalGMM(prior_distribution_->n_inputs, prior_distribution_->n_gaussians) { prior_distribution = prior_distribution_; addROption("weight on prior", &weight_on_prior , 0.5, "weight for the prior distribution for MAP adaptation"); addBOption("learn weights", &learn_weights, false, "learn the weights of gaussians"); addBOption("learn variances", &learn_variances, false, "learn the variances of gaussians"); addBOption("learn means", &learn_means, false, "learn the variances of gaussians"); } void MAPDiagonalGMM::setDataSet(DataSet* data_) { // here, initialize the parameters to the parameters of the prior // distribution if (prior_distribution) params->copy(prior_distribution->params); else DiagonalGMM::reset(); } void MAPDiagonalGMM::frameViterbiAccPosteriors(int t, real *inputs, real log_posterior) { if(learn_variances){ DiagonalGMM::frameViterbiAccPosteriors(t, inputs, log_posterior); return; } real *p_weights_acc = weights_acc; real *lp_i = log_probabilities_g->frames[t]; real *log_w_i = log_weights; real max_lpg = LOG_ZERO; int best_g = 0; //findmax for (int i=0;i max_lpg){ best_g = i; max_lpg = post_i; } } p_weights_acc[best_g] += 1; real* means_acc_i = means_acc[best_g]; real *x = inputs; for(int j = 0; j < n_inputs; j++) { *means_acc_i++ += *x++; } } void MAPDiagonalGMM::frameEMAccPosteriors(int t, real *inputs, real log_posterior) { if(learn_variances){ DiagonalGMM::frameEMAccPosteriors(t, inputs, log_posterior); return; } real log_prob = log_probabilities->frames[t][0]; real *p_weights_acc = weights_acc; real *lp_i = log_probabilities_g->frames[t]; real *log_w_i = log_weights; for (int i=0;imeans[i]; real* p_means_i = means[i]; for (int j=0;jmeans[i]; real* p_means_i = means[i]; real* p_means_acc_i = means_acc[i]; for (int j=0;jmeans[i]; real* p_var_prior_i = prior_distribution->var[i]; for (int j=0;j= var_threshold[j] ? var_map : var_threshold[j]; } } } if(learn_weights){ // then the weights real sum_weights_acc = 0; p_weights_acc = weights_acc; for (int i=0;ilog_weights; real log_sum = log(sum_weights_acc); p_weights_acc = weights_acc; for (int i=0;ieMUpdate(); } // then the transitions; for (int i=0;ilog_transitions[j][i], log_1_weight_on_prior + log(transitions_acc[j][i]) - log_sum); } } } MAPHMM::~MAPHMM() { } } torch3-3.1.orig/distributions/MAPHMM.h0000644000175000017500000000474610106445237017743 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (marietho@idiap.ch) // and Samy Bengio (bengio@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef MAP_HMM_INC #define MAP_HMM_INC #include "HMM.h" namespace Torch { /** This class is a special case of a HMM that implements the MAP algorithm for HMM transitions probabilities. @author Samy Bengio (bengio@idiap.ch) @author Johnny Mariethoz (marietho@idiap.ch) */ class MAPHMM : public HMM { public: /// The prior distribution used in MAP HMM* prior_distribution; /// The weight to give to the prior parameters during update real weight_on_prior; ///log(weight_on_prior) real log_weight_on_prior; ///log(1-weight_on_prior_ real log_1_weight_on_prior; /// MAPHMM(int n_states_, Distribution **states_, real** transitions_, HMM* prior_distribution_); void setWeightOnPrior(real weight_on_prior_); /// map adaptation method for transitions probabilities virtual void eMUpdate(); virtual ~MAPHMM(); }; } #endif torch3-3.1.orig/distributions/LICENSE0000644000175000017500000000271110106445237017606 0ustar kalfakalfa00000000000000 Copyright (c) 2003--2004 Ronan Collobert Copyright (c) 2003--2004 Samy Bengio Copyright (c) 2003--2004 Johnny Mariéthoz All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. torch3-3.1.orig/convolutions/0000755000175000017500000000000010106445235016436 5ustar kalfakalfa00000000000000torch3-3.1.orig/convolutions/Makefile0000644000175000017500000000172710106445235020105 0ustar kalfakalfa00000000000000# get user and architecture specific options OS := $(shell uname -s) TORCHDIR := $(shell cd ..; pwd) include ../Makefile_options_$(OS) CC_FILES := $(wildcard *.cc) OBJS := $(foreach f,$(CC_FILES),$(OBJS_DIR)/$(patsubst %.cc,%.o,$(f))) all: $(LIBTORCH) $(LIBTORCH): $(OBJS) @echo "Archiving..." @$(AR) $(LIBTORCH) $(OBJS) $(OBJS_DIR)/%.o: %.cc @echo $< @$(CC) $(CFLAGS_$(MODE)) $(INCS) -o $@ -c $< distclean: @\rm -f .deps_* clean: @echo "Remove objects file and dependencies..." @\rm -Rf $(OBJS) $(LIBTORCH) @\rm -f .deps_$(VERSION_KEY) depend: @echo "Tracking dependencies..." @\rm -f .deps_$(VERSION_KEY) @for file in *.cc ; do printf "$(OBJS_DIR)/" >> .deps_$(VERSION_KEY); $(DEP) $(CFLAGS_$(MODE)) $(INCS) $$file >> .deps_$(VERSION_KEY); done .deps_$(VERSION_KEY): @echo ">>> Please do a 'make depend' <<<" exit 10 ifneq ($(MAKECMDGOALS),distclean) ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),depend) include .deps_$(VERSION_KEY) endif endif endif torch3-3.1.orig/convolutions/SpatialConvolution.cc0000644000175000017500000001640210106445235022605 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "SpatialConvolution.h" #include "Random.h" namespace Torch { /* Bon. Pour info, j'ai essaye de coder une premiere version degeulasse, ou j'essayais de prendre en compte le cache de la becane. Ca m'a pris une demi journee, plus quelques heures de debuggage, et c'etait du code horrible. J'ai recode ce truc en 10 minutes. Ca a marche du premier coup. Et c'est plus rapide! Bordel! Alors vous prenez pas la tete... */ SpatialConvolution::SpatialConvolution(int n_input_planes_, int n_output_planes_, int width_, int height_, int k_w_, int d_x_, int d_y_) : GradientMachine(0, 0) { n_input_planes = n_input_planes_; n_output_planes = n_output_planes_; input_width = width_; input_height = height_; k_w = k_w_; d_x = d_x_; d_y = d_y_; n_inputs = n_input_planes * input_height * input_width; output_height = (input_height - k_w) / d_y + 1; output_width = (input_width - k_w) / d_x + 1; n_outputs = n_output_planes * output_height * output_width; if(input_height < k_w) error("SpatialConvolution: input image height is too small (height = %d < k_w = %d) ", input_height, k_w); if(input_width < k_w) error("SpatialConvolution: input image width is too small (width = %d < k_w = %d) ", input_width, k_w); outputs = new(allocator) Sequence(1, n_outputs); beta = new(allocator) Sequence(1, n_inputs); int n_params_ = k_w*k_w*n_input_planes*n_output_planes+n_output_planes; params = new(allocator) Parameters(n_params_); der_params = new(allocator) Parameters(n_params_); weights = (real **)allocator->alloc(sizeof(real *)*n_output_planes); for(int i = 0; i < n_output_planes; i++) weights[i] = params->data[0] + i*k_w*k_w*n_input_planes; biases = params->data[0] + k_w*k_w*n_input_planes*n_output_planes; der_weights = (real **)allocator->alloc(sizeof(real *)*n_output_planes); for(int i = 0; i < n_output_planes; i++) der_weights[i] = der_params->data[0] + i*k_w*k_w*n_input_planes; der_biases = der_params->data[0] + k_w*k_w*n_input_planes*n_output_planes; message("SpatialConvolution: output image is <%d x %d>", output_width, output_height); reset_(); } void SpatialConvolution::reset_() { real bound = 1./sqrt((real)(k_w*k_w*n_input_planes)); real *params_ = params->data[0]; for(int i = 0; i < params->n_params; i++) params_[i] = Random::boundedUniform(-bound, bound); } void SpatialConvolution::reset() { reset_(); } void SpatialConvolution::frameForward(int t, real *f_inputs, real *f_outputs) { for(int k = 0; k < n_output_planes; k++) { // Initialize to the bias real z = biases[k]; for(int i = 0; i < output_width*output_height; i++) f_outputs[i] = z; // Go! for(int i = 0; i < n_input_planes; i++) { // Get the good mask for (k,i) (k out, i in) real *ptr_w = weights[k]+i*k_w*k_w; // Get the input image real *ptr_img_in = f_inputs+i*input_width*input_height; // For all output pixels... real *outputs_ = f_outputs; for(int yy = 0; yy < output_height; yy++) { for(int xx = 0; xx < output_width; xx++) { // Dot product in two dimensions... (between input image and the mask) real *ptr_img_in_ = ptr_img_in+yy*d_y*input_width+xx*d_x; real *ptr_w_ = ptr_w; real sum = 0; for(int ky = 0; ky < k_w; ky++) { for(int kx = 0; kx < k_w; kx++) sum += ptr_img_in_[kx]*ptr_w_[kx]; ptr_img_in_ += input_width; // next input line ptr_w_ += k_w; // next mask line } // Update output *outputs_++ += sum; } } } // Next output plane f_outputs += output_width*output_height; } } void SpatialConvolution::frameBackward(int t, real *f_inputs, real *beta_, real *f_outputs, real *alpha_) { //NOTE: boucle *necessaire* avec "partial backprop" real *alpha__ = alpha_; for(int k = 0; k < n_output_planes; k++) { real sum = 0; for(int i = 0; i < output_width*output_height; i++) sum += alpha__[i]; der_biases[k] += sum; for(int i = 0; i < n_input_planes; i++) { real *der_ptr_w = der_weights[k] + i*k_w*k_w; real *ptr_img_in = f_inputs+i*input_width*input_height; real *alpha___ = alpha__; for(int yy = 0; yy < output_height; yy++) { for(int xx = 0; xx < output_width; xx++) { real *ptr_img_in_ = ptr_img_in+yy*d_y*input_width+xx*d_x; real *der_ptr_w_ = der_ptr_w; real z = *alpha___++; for(int ky = 0; ky < k_w; ky++) { for(int kx = 0; kx < k_w; kx++) der_ptr_w_[kx] += z * ptr_img_in_[kx]; ptr_img_in_ += input_width; der_ptr_w_ += k_w; } } } } alpha__ += output_width*output_height; } if(partial_backprop) return; // NOTE: boucle *non-necessaire* avec "partial backprop" for(int k = 0; k < n_inputs; k++) beta_[k] = 0; alpha__ = alpha_; for(int k = 0; k < n_output_planes; k++) { for(int i = 0; i < n_input_planes; i++) { real *ptr_w = weights[k]+i*k_w*k_w; real *beta__ = beta_+i*input_width*input_height; real *alpha___ = alpha__; for(int yy = 0; yy < output_height; yy++) { for(int xx = 0; xx < output_width; xx++) { real *beta___ = beta__+yy*d_y*input_width+xx*d_x; real *ptr_w_ = ptr_w; real z = *alpha___++; for(int ky = 0; ky < k_w; ky++) { for(int kx = 0; kx < k_w; kx++) beta___[kx] += z * ptr_w_[kx]; beta___ += input_width; ptr_w_ += k_w; } } } } alpha__ += output_width*output_height; } } SpatialConvolution::~SpatialConvolution() { } } torch3-3.1.orig/convolutions/SpatialConvolution.h0000644000175000017500000000771710106445235022460 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef SPATIAL_CONVOLUTION_INC #define SPATIAL_CONVOLUTION_INC #include "GradientMachine.h" namespace Torch { /** Class for doing convolution over images. Suppose you put #n_input_planes# images in each input frame. The images are in one big vector: each input frame has a size of #n_input_planes*input_height*input_width#. (image after image). Thus, #n_inputs = n_input_planes*input_height*input_width#. Then, for each output planes, it computes the convolution of \emph{all} input image planes with a kernel of size #k_w*k_w*n_input_planes#. The output image size is computed in the constructor and put in #output_height# and #output_width#. #n_outputs = n_output_planes*output_height*output_width#. Note that, depending of the size of your kernel, several (last) columns or rows of the input image could be lost. Note also that \emph{no} non-linearity is applied in this layer. @author Ronan Collobert (collober@idiap.ch) */ class SpatialConvolution : public GradientMachine { public: /// Kernel size (height and width). int k_w; /// 'x' translation \emph{in the input image} after each application of the kernel. int d_x; /// 'y' translation \emph{in the input image} after each application of the kernel. int d_y; /// Number of input images. int n_input_planes; /// Number of output images. int n_output_planes; /// Height of each input image. int input_height; /// Width of each input image. int input_width; /// Height of each output image. int output_height; /// Width of each output image. int output_width; /** #weights[i]# means kernel-weights for output plane #i#. #weights[i]# contains #n_input_planes# times #k_w*k_w# weights. */ real **weights; /// Derivatives associated to #weights#. real **der_weights; /// #biases[i]# is the bias for output plane #i#. real *biases; /// Derivatives associated to #biases#. real *der_biases; /// Create a convolution layer... SpatialConvolution(int n_input_planes_, int n_output_planes_, int width_, int height_, int k_w_=5, int d_x_=1, int d_y_=1); //----- void reset_(); virtual void reset(); virtual void frameForward(int t, real *f_inputs, real *f_outputs); virtual void frameBackward(int t, real *f_inputs, real *beta_, real *f_outputs, real *alpha_); virtual ~SpatialConvolution(); }; } #endif torch3-3.1.orig/convolutions/SpatialSubSampling.cc0000644000175000017500000001350610106445235022514 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "SpatialSubSampling.h" #include "Random.h" namespace Torch { SpatialSubSampling::SpatialSubSampling(int n_input_planes_, int width_, int height_, int k_w_, int d_x_, int d_y_) : GradientMachine(0, 0) { n_input_planes = n_input_planes_; input_width = width_; input_height = height_; k_w = k_w_; d_x = d_x_; d_y = d_y_; n_inputs = n_input_planes * input_height * input_width; output_height = (input_height - k_w) / d_y + 1; output_width = (input_width - k_w) / d_x + 1; n_outputs = n_input_planes * output_height * output_width; if(input_height < k_w) error("SpatialConvolution: input image height is too small (height = %d < k_w = %d) ", input_height, k_w); if(input_width < k_w) error("SpatialConvolution: input image width is too small (width = %d < k_w = %d) ", input_width, k_w); outputs = new(allocator) Sequence(1, n_outputs); beta = new(allocator) Sequence(1, n_inputs); int n_params_ = 2*n_input_planes; params = new(allocator) Parameters(n_params_); der_params = new(allocator) Parameters(n_params_); weights = params->data[0]; biases = params->data[0] + n_input_planes; der_weights = der_params->data[0]; der_biases = der_params->data[0] + n_input_planes; message("SpatialSubSampling: output image is <%d x %d>", output_width, output_height); reset_(); } void SpatialSubSampling::reset_() { real bound = 1./sqrt((real)(k_w*k_w)); real *params_ = params->data[0]; for(int i = 0; i < params->n_params; i++) params_[i] = Random::boundedUniform(-bound, bound); } void SpatialSubSampling::reset() { reset_(); } void SpatialSubSampling::frameForward(int t, real *f_inputs, real *f_outputs) { for(int k = 0; k < n_input_planes; k++) { // Initialize to the bias real z = biases[k]; for(int i = 0; i < output_width*output_height; i++) f_outputs[i] = z; // Go! // Get the good mask for (k,i) (k out, i in) real the_weight = weights[k]; // For all output pixels... real *outputs_ = f_outputs; for(int yy = 0; yy < output_height; yy++) { for(int xx = 0; xx < output_width; xx++) { // Compute the mean of the input image... real *ptr_img_in = f_inputs+yy*d_y*input_width+xx*d_x; real sum = 0; for(int ky = 0; ky < k_w; ky++) { for(int kx = 0; kx < k_w; kx++) sum += ptr_img_in[kx]; ptr_img_in += input_width; // next input line } // Update output *outputs_++ += the_weight*sum; } } // Next input/output plane f_outputs += output_width*output_height; f_inputs += input_width*input_height; } } void SpatialSubSampling::frameBackward(int t, real *f_inputs, real *beta_, real *f_outputs, real *alpha_) { // NOTE: boucle *necessaire* avec "partial backprop" real *alpha__ = alpha_; for(int k = 0; k < n_input_planes; k++) { real sum = 0; for(int i = 0; i < output_width*output_height; i++) sum += alpha__[i]; der_biases[k] += sum; real *alpha___ = alpha__; sum = 0; for(int yy = 0; yy < output_height; yy++) { for(int xx = 0; xx < output_width; xx++) { real *ptr_img_in = f_inputs+yy*d_y*input_width+xx*d_x; real z = *alpha___++; for(int ky = 0; ky < k_w; ky++) { for(int kx = 0; kx < k_w; kx++) sum += z * ptr_img_in[kx]; ptr_img_in += input_width; } } } der_weights[k] += sum; alpha__ += output_width*output_height; f_inputs += input_width*input_height; } if(partial_backprop) return; // NOTE: boucle *non-necessaire* avec "partial backprop" for(int k = 0; k < n_inputs; k++) beta_[k] = 0; alpha__ = alpha_; for(int k = 0; k < n_input_planes; k++) { real the_weight = weights[k]; real *alpha___ = alpha__; for(int yy = 0; yy < output_height; yy++) { for(int xx = 0; xx < output_width; xx++) { real *beta__ = beta_+yy*d_y*input_width+xx*d_x; real z = *alpha___++ * the_weight; for(int ky = 0; ky < k_w; ky++) { for(int kx = 0; kx < k_w; kx++) beta__[kx] += z; beta__ += input_width; } } } alpha__ += output_width*output_height; beta_ += input_width*input_height; } } SpatialSubSampling::~SpatialSubSampling() { } } torch3-3.1.orig/convolutions/SpatialSubSampling.h0000644000175000017500000000771510106445235022363 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef SPATIAL_SUB_SAMPLING_INC #define SPATIAL_SUB_SAMPLING_INC #include "GradientMachine.h" namespace Torch { /** Class for doing sub-sampling over images. Suppose you put #n_input_planes# images in each input frame. The images are in one big vector: each input frame has a size of #n_input_planes*input_height*input_width#. (image after image). Thus, #n_inputs = n_input_planes*input_height*input_width#. Then, for each output planes, it takes its associated input plane and it computes the convolution of the input image with a kernel of size #k_w*k_w#, where the weights of the kernel are equals. The output image size is computed in the constructor and put in #output_height# and #output_width#. #n_outputs = n_input_planes*output_height*output_width#. Note that, depending of the size of your kernel, several (last) input columns or rows of the image could be lost. Note also that \emph{no} non-linearity is applied in this layer. @author Ronan Collobert (collober@idiap.ch) */ class SpatialSubSampling : public GradientMachine { public: /// Kernel size (height and width). int k_w; /// 'x' translation \emph{in the input image} after each application of the kernel. int d_x; /// 'y' translation \emph{in the input image} after each application of the kernel. int d_y; /// Number of input images. The number of output images in sub-sampling is the same. int n_input_planes; /// Height of each input image. int input_height; /// Width of each input image. int input_width; /// Height of each output image. int output_height; /// Width of each output image. int output_width; /** #weights[i]# means kernel-weight for output plane #i#. #weights[i]# contains only one weight. */ real *weights; /// Derivatives associated to #weights#. real *der_weights; /// #biases[i]# is the bias for output plane #i#. real *biases; /// Derivatives associated to #biases#. real *der_biases; /// Create a sub-sampling layer... SpatialSubSampling(int n_input_planes_, int width_, int height_, int k_w_=2, int d_x_=2, int d_y_=2); //----- void reset_(); virtual void reset(); virtual void frameForward(int t, real *f_inputs, real *f_outputs); virtual void frameBackward(int t, real *f_inputs, real *beta_, real *f_outputs, real *alpha_); virtual ~SpatialSubSampling(); }; } #endif torch3-3.1.orig/convolutions/TemporalConvolution.cc0000644000175000017500000001204610106445235022773 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "TemporalConvolution.h" #include "Random.h" namespace Torch { TemporalConvolution::TemporalConvolution(int input_frame_size, int output_frame_size, int k_w_, int d_t_) : GradientMachine(input_frame_size, output_frame_size, (k_w_*input_frame_size+1)*output_frame_size) { k_w = k_w_; d_t = d_t_; weights = (real **)allocator->alloc(sizeof(real *)*n_outputs); for(int i = 0; i < n_outputs; i++) weights[i] = params->data[0] + i*k_w*n_inputs; biases = params->data[0] + k_w*n_inputs*n_outputs; der_weights = (real **)allocator->alloc(sizeof(real *)*n_outputs); for(int i = 0; i < n_outputs; i++) der_weights[i] = der_params->data[0] + i*k_w*n_inputs; der_biases = der_params->data[0] + k_w*n_inputs*n_outputs; reset_(); } void TemporalConvolution::reset_() { real bound = 1./sqrt((real)(k_w*n_inputs)); real *params_ = params->data[0]; for(int i = 0; i < params->n_params; i++) params_[i] = Random::boundedUniform(-bound, bound); } void TemporalConvolution::reset() { reset_(); } void TemporalConvolution::forward(Sequence *inputs) { if(inputs->n_frames < k_w) error("TemporalSubSampling: input sequence too small! (n_frames = %d < k_w = %d)", inputs->n_frames, k_w); int n_output_frames = (inputs->n_frames - k_w) / d_t + 1; outputs->resize(n_output_frames); int current_input_frame = 0; for(int i = 0; i < n_output_frames; i++) { real *output_frame_ = outputs->frames[i]; for(int j = 0; j < n_outputs; j++) output_frame_[j] = biases[j]; // Sur le noyau... for(int j = 0; j < k_w; j++) { // Sur tous les "neurones" de sorties for(int k = 0; k < n_outputs; k++) { real *weights_ = weights[k]+j*n_inputs; real *input_frame_ = inputs->frames[current_input_frame+j]; real sum = 0; for(int l = 0; l < n_inputs; l++) sum += weights_[l]*input_frame_[l]; output_frame_[k] += sum; } } current_input_frame += d_t; } } void TemporalConvolution::backward(Sequence *inputs, Sequence *alpha) { int n_output_frames = alpha->n_frames; // NOTE: boucle *necessaire* avec "partial backprop" int current_input_frame = 0; for(int i = 0; i < n_output_frames; i++) { real *alpha_frame_ = alpha->frames[i]; for(int j = 0; j < n_outputs; j++) der_biases[j] += alpha_frame_[j]; for(int j = 0; j < k_w; j++) { for(int k = 0; k < n_outputs; k++) { real *der_weights_ = der_weights[k]+j*n_inputs; real *input_frame_ = inputs->frames[current_input_frame+j]; real alpha_ = alpha_frame_[k]; for(int l = 0; l < n_inputs; l++) der_weights_[l] += alpha_*input_frame_[l]; } } current_input_frame += d_t; } if(partial_backprop) return; // NOTE: boucle *non-necessaire* avec "partial backprop" beta->resize(inputs->n_frames); for(int i = 0; i < beta->n_frames; i++) { real *beta_frame_ = beta->frames[i]; for(int j = 0; j < n_inputs; j++) beta_frame_[j] = 0; } int current_beta_frame = 0; for(int i = 0; i < n_output_frames; i++) { real *alpha_frame_ = alpha->frames[i]; for(int j = 0; j < k_w; j++) { for(int k = 0; k < n_outputs; k++) { real *weights_ = weights[k]+j*n_inputs; real *beta_frame_ = beta->frames[current_beta_frame+j]; real alpha_ = alpha_frame_[k]; for(int l = 0; l < n_inputs; l++) beta_frame_[l] += weights_[l]*alpha_; } } current_beta_frame += d_t; } } TemporalConvolution::~TemporalConvolution() { } } torch3-3.1.orig/convolutions/TemporalConvolution.h0000644000175000017500000000575110106445235022642 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef TEMPORAL_CONVOLUTION_INC #define TEMPORAL_CONVOLUTION_INC #include "GradientMachine.h" namespace Torch { /** Class for doing a convolution over a sequence. For each component of output frames, it computes the convolution of the input sequence with a kernel of size #k_w# (over the time). Note that, depending of the size of your kernel, several (last) frames of the input sequence could be lost. Note also that \emph{no} non-linearity is applied in this layer. @author Ronan Collobert (collober@idiap.ch) */ class TemporalConvolution : public GradientMachine { public: /// Kernel size. int k_w; /// Time translation after one application of the kernel. int d_t; /** #weights[i]# means kernel-weights for the #i#-th component of output frames. #weights[i]# contains #input_frame_size# times #k_w# weights. */ real **weights; /// Derivatives associated to #weights#. real **der_weights; /// #biases[i]# is the bias for the #i#-th component of output frames. real *biases; /// Derivatives associated to #biases#. real *der_biases; /// Create a convolution layer... TemporalConvolution(int input_frame_size, int output_frame_size, int k_w_=5, int d_t_=1); //----- void reset_(); virtual void reset(); virtual void forward(Sequence *inputs); virtual void backward(Sequence *inputs, Sequence *alpha); virtual ~TemporalConvolution(); }; } #endif torch3-3.1.orig/convolutions/TemporalMean.cc0000644000175000017500000000503410106445235021333 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "TemporalMean.h" #include "Random.h" namespace Torch { TemporalMean::TemporalMean(int input_frame_size) : GradientMachine(input_frame_size, input_frame_size) { outputs->resize(1); } void TemporalMean::forward(Sequence *inputs) { real *outputs_ = outputs->frames[0]; for(int i = 0; i < n_outputs; i++) outputs_[i] = 0; for(int i = 0; i < inputs->n_frames; i++) { real *input_frame_ = inputs->frames[i]; for(int j = 0; j < n_outputs; j++) outputs_[j] += input_frame_[j]; } real z = 1./((real)inputs->n_frames); for(int i = 0; i < n_outputs; i++) outputs_[i] *= z; } void TemporalMean::backward(Sequence *inputs, Sequence *alpha) { if(partial_backprop) return; beta->resize(inputs->n_frames); real *alpha_ = alpha->frames[0]; real z = 1./((real)inputs->n_frames); for(int i = 0; i < beta->n_frames; i++) { real *beta_frame_ = beta->frames[i]; for(int j = 0; j < n_outputs; j++) beta_frame_[j] = alpha_[j]*z; } } TemporalMean::~TemporalMean() { } } torch3-3.1.orig/convolutions/TemporalMean.h0000644000175000017500000000414410106445235021176 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef TEMPORAL_MEAN_INC #define TEMPORAL_MEAN_INC #include "GradientMachine.h" namespace Torch { /** Given an input sequence, it does the mean over all input frames. Thus, the output sequence will have only one frame, which has the same size than input frames. @author Ronan Collobert (collober@idiap.ch) */ class TemporalMean : public GradientMachine { public: /// TemporalMean(int input_frame_size); //----- virtual void forward(Sequence *inputs); virtual void backward(Sequence *inputs, Sequence *alpha); virtual ~TemporalMean(); }; } #endif torch3-3.1.orig/convolutions/TemporalSubSampling.cc0000644000175000017500000001117010106445235022675 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "TemporalSubSampling.h" #include "Random.h" namespace Torch { TemporalSubSampling::TemporalSubSampling(int input_frame_size, int k_w_, int d_t_) : GradientMachine(input_frame_size, input_frame_size, 2*input_frame_size) { k_w = k_w_; d_t = d_t_; weights = params->data[0]; biases = params->data[0] + n_inputs; der_weights = der_params->data[0]; der_biases = der_params->data[0] + n_inputs; reset_(); } void TemporalSubSampling::reset_() { real bound = 1./sqrt((real)(k_w)); real *params_ = params->data[0]; for(int i = 0; i < params->n_params; i++) params_[i] = Random::boundedUniform(-bound, bound); } void TemporalSubSampling::reset() { reset_(); } void TemporalSubSampling::forward(Sequence *inputs) { if(inputs->n_frames < k_w) error("TemporalSubSampling: input sequence too small! (n_frames = %d < k_w = %d)", inputs->n_frames, k_w); int n_output_frames = (inputs->n_frames - k_w) / d_t + 1; outputs->resize(n_output_frames); int current_input_frame = 0; for(int i = 0; i < n_output_frames; i++) { real *output_frame_ = outputs->frames[i]; for(int j = 0; j < n_outputs; j++) output_frame_[j] = biases[j]; // Sur le noyau... for(int j = 0; j < k_w; j++) { // Sur tous les "neurones" de sorties for(int k = 0; k < n_outputs; k++) { real *input_frame_ = inputs->frames[current_input_frame+j]; real sum = 0; for(int l = 0; l < n_inputs; l++) sum += input_frame_[l]; output_frame_[k] += weights[k]*sum; } } current_input_frame += d_t; } } void TemporalSubSampling::backward(Sequence *inputs, Sequence *alpha) { int n_output_frames = alpha->n_frames; // NOTE: boucle *necessaire* avec "partial backprop" int current_input_frame = 0; for(int i = 0; i < n_output_frames; i++) { real *alpha_frame_ = alpha->frames[i]; for(int j = 0; j < n_outputs; j++) der_biases[j] += alpha_frame_[j]; for(int j = 0; j < k_w; j++) { for(int k = 0; k < n_outputs; k++) { real *input_frame_ = inputs->frames[current_input_frame+j]; real alpha_ = alpha_frame_[k]; real sum = 0; for(int l = 0; l < n_inputs; l++) sum += alpha_*input_frame_[l]; der_weights[k] += sum; } } current_input_frame += d_t; } if(partial_backprop) return; // NOTE: boucle *non-necessaire* avec "partial backprop" beta->resize(inputs->n_frames); for(int i = 0; i < beta->n_frames; i++) { real *beta_frame_ = beta->frames[i]; for(int j = 0; j < n_inputs; j++) beta_frame_[j] = 0; } int current_beta_frame = 0; for(int i = 0; i < n_output_frames; i++) { real *alpha_frame_ = alpha->frames[i]; for(int j = 0; j < k_w; j++) { for(int k = 0; k < n_outputs; k++) { real *beta_frame_ = beta->frames[current_beta_frame+j]; real alpha_mul_weight_ = alpha_frame_[k]*weights[k]; for(int l = 0; l < n_inputs; l++) beta_frame_[l] += alpha_mul_weight_; } } current_beta_frame += d_t; } } TemporalSubSampling::~TemporalSubSampling() { } } torch3-3.1.orig/convolutions/TemporalSubSampling.h0000644000175000017500000000603410106445235022542 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Ronan Collobert (collober@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef TEMPORAL_SUB_SAMPLING_INC #define TEMPORAL_SUB_SAMPLING_INC #include "GradientMachine.h" namespace Torch { /** Class for doing sub-sampling over a sequence. Then, for each component of output frames, it takes its associated input component and it computes the convolution of the input sequence with a kernel of size #k_w#, over the time, where the weights of the kernel are equals. Note that, depending of the size of your kernel, several (last) frames of the input seqience could be lost. Note also that \emph{no} non-linearity is applied in this layer. @author Ronan Collobert (collober@idiap.ch) */ class TemporalSubSampling : public GradientMachine { public: /// Kernel size. int k_w; /// Time translation after one application of the kernel. int d_t; /** #weights[i]# means kernel-weights for the #i#-th component of output frames. #weights[i]# contains only one weight. */ real *weights; /// Derivatives associated to #weights#. real *der_weights; /// #biases[i]# is the bias for the #i#-th component of output frames. real *biases; /// Derivatives associated to #biases#. real *der_biases; /// Create a sub-sampling layer... TemporalSubSampling(int input_frame_size, int k_w_=2, int d_t_=2); //----- void reset_(); virtual void reset(); virtual void forward(Sequence *inputs); virtual void backward(Sequence *inputs, Sequence *alpha); virtual ~TemporalSubSampling(); }; } #endif torch3-3.1.orig/convolutions/LICENSE0000644000175000017500000000271110106445235017444 0ustar kalfakalfa00000000000000 Copyright (c) 2003--2004 Ronan Collobert Copyright (c) 2003--2004 Samy Bengio Copyright (c) 2003--2004 Johnny Mariéthoz All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. torch3-3.1.orig/datasets/0000755000175000017500000000000010106445235015504 5ustar kalfakalfa00000000000000torch3-3.1.orig/datasets/DiskHTKDataSet.cc0000644000175000017500000000726310106445235020532 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "DiskHTKDataSet.h" #include "IOHTK.h" #include "IOHTKTarget.h" #include "IOSub.h" #include "IOMulti.h" namespace Torch { DiskHTKDataSet::DiskHTKDataSet(const char *inputs_filename, bool one_file_is_one_sequence, int max_load, const char *targets_filename, LexiconInfo* lex_, bool words) { IOHTK* io_inputs_ptr = new(allocator) IOHTK(inputs_filename, one_file_is_one_sequence, max_load,true); io_inputs = io_inputs_ptr; io_targets = NULL; if(targets_filename) { n_per_frame = (int)(io_inputs_ptr->header->sample_period); io_targets = new(allocator) IOHTKTarget(targets_filename, lex_, n_per_frame, words); } DiskDataSet::init(io_inputs, io_targets); message("DiskHTKDataSet: %d examples loaded", n_examples); } DiskHTKDataSet::DiskHTKDataSet(char **inputs_filenames, char ** targets_filename, int n_files_, bool one_file_is_one_sequence, int max_load, LexiconInfo* lex_, bool words) { if(n_files_ <= 0) error("DiskHTKDataSet: check the number of files!"); //inputs int n_files = n_files_; IOHTK **io_files = (IOHTK **)allocator->alloc(sizeof(IOHTK *)*n_files_); if(max_load > 0) { int i = 0; while( (max_load > 0) && (i < n_files_) ) { io_files[i] = new(allocator) IOHTK(inputs_filenames[i], one_file_is_one_sequence, max_load, true); max_load -= io_files[i]->n_sequences; i++; } n_files = i; } else { for(int i = 0; i < n_files_; i++) io_files[i] = new(allocator) IOHTK(inputs_filenames[i], one_file_is_one_sequence, -1, true); } io_inputs = new(allocator) IOMulti((IOSequence**)io_files, n_files); //targets io_targets = NULL; if(targets_filename){ n_per_frame = (int)(io_files[0]->header->sample_period); IOHTKTarget **io_files_targets = (IOHTKTarget **)allocator->alloc(sizeof(IOHTKTarget *)*n_files_); for ( int i=0; i < n_files_;i++) io_files_targets[i] = new(allocator) IOHTKTarget(inputs_filenames[i], lex_, n_per_frame, words); io_targets = new(allocator) IOMulti((IOSequence**)io_files_targets, n_files_); } DiskDataSet::init(io_inputs, io_targets); message("DiskHTKDataSet: %d examples loaded", n_examples); } DiskHTKDataSet::~DiskHTKDataSet() { } } torch3-3.1.orig/datasets/LexiconInfo.cc0000644000175000017500000002441610106445235020237 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "LexiconInfo.h" #include "Allocator.h" namespace Torch { LexiconInfo::LexiconInfo( const char *phones_fname , const char *sil_phone , const char *pause_phone , const char *lex_fname , const char *sent_start_word , const char *sent_end_word , const char *sil_word ) { char line[1000] , ch , curr_word[1000] , *curr_phone ; DiskXFile lex_fd( lex_fname , "r" ) ; real prior=1.0 ; int n_flds , index , vocab_index=0 , temp_phones[100] , temp , ind , total_n_entries ; LexiconInfoEntry *curr_entry ; n_entries = 0 ; entries = NULL ; sent_start_index = -1 ; sent_end_index = -1 ; sil_index = -1 ; phone_info = NULL ; vocabulary = NULL ; vocab_to_lex_map = NULL ; // Create the PhoneInfo and Vocabulary objects phone_info = new(allocator) PhoneInfo( phones_fname , sil_phone , pause_phone ) ; vocabulary = new(allocator) Vocabulary( lex_fname , sent_start_word , sent_end_word , sil_word ) ; // Allocate memory for mappings between vocab entries and dictionary entries. vocab_to_lex_map = (VocabToLexMapEntry *)allocator->alloc( vocabulary->n_words * sizeof(VocabToLexMapEntry) ) ; for ( int i=0 ; in_words ; i++ ) { vocab_to_lex_map[i].n_pronuns = 0 ; vocab_to_lex_map[i].pronuns = NULL ; } // Do a first pass off the file to determine the total number of pronuns. total_n_entries = 0 ; while ( lex_fd.gets( line , 1000 ) != NULL ) { if ( (line[0] == '#') || (strtok( line , "(\r\n\t " ) == NULL) ) continue ; total_n_entries++ ; } // Allocate some memory entries = (LexiconInfoEntry *)allocator->alloc( total_n_entries * sizeof(LexiconInfoEntry) ) ; lex_fd.seek( 0 , SEEK_SET ) ; // Now re-read the file and fill in the entries. n_entries = 0 ; while ( lex_fd.gets( line , 1000 ) != NULL ) { #ifdef USE_DOUBLE if ( (line[0] == '#') || ((n_flds = sscanf( line , "%[^( \t]%c%lf" , curr_word , &ch , &prior)) == 0) ) #else if ( (line[0] == '#') || ((n_flds = sscanf( line , "%[^( \t]%c%f" , curr_word , &ch , &prior)) == 0) ) #endif { continue ; } if ( n_flds < 3 ) prior = 1.0 ; if ( n_entries >= total_n_entries ) error("LexiconInfo::LexiconInfo - n_entries exceeded expected\n") ; // Find the vocab index of the new word vocab_index = vocabulary->getIndex( curr_word , vocab_index ) ; if ( vocab_index < 0 ) error("LexiconInfo::LexiconInfo - word %s not found in vocabulary\n",curr_word) ; // Allocate memory for the new lexicon entry. curr_entry = entries + n_entries ; initLexInfoEntry( curr_entry ) ; curr_entry->vocab_index = vocab_index ; curr_entry->log_prior = log( prior ) ; // read in the phones for the new pronunciation strtok( line , " \r\n\t" ) ; // get past the word while ( (curr_phone=strtok(NULL," \r\n\t")) != NULL ) { // find the index of the phone's model index = phone_info->getIndex( curr_phone ) ; // Add it to the list of models we are compiling or report error // if the phone name was not found. if ( (index < 0) || (index >= phone_info->n_phones) ) error("LexiconInfo::LexiconInfo - %s not found in phone list\n" , curr_phone) ; else temp_phones[(curr_entry->n_phones)++] = index ; } curr_entry->phones = (int *)allocator->alloc( curr_entry->n_phones*sizeof(int) ) ; memcpy( curr_entry->phones , temp_phones , curr_entry->n_phones*sizeof(int) ) ; if ( curr_entry->n_phones == 0 ) error("LexiconInfo::LexiconInfo - %s had no phones\n",curr_word) ; // Update the appropriate vocab_to_lex_map entry temp = ++(vocab_to_lex_map[vocab_index].n_pronuns) ; vocab_to_lex_map[vocab_index].pronuns = (int *)allocator->realloc( vocab_to_lex_map[vocab_index].pronuns , temp * sizeof(int) ) ; vocab_to_lex_map[vocab_index].pronuns[temp-1] = n_entries ; // Check if these are 'special' words if ( vocab_index == vocabulary->sent_start_index ) { if ( sent_start_index >= 0 ) error("LexiconInfo::LexiconInfo - cannot have >1 pronuns of the start word\n") ; sent_start_index = n_entries ; } if ( vocab_index == vocabulary->sent_end_index ) { if ( sent_end_index >= 0 ) error("LexiconInfo::LexiconInfo - cannot have >1 pronuns of the end word\n") ; sent_end_index = n_entries ; } if ( vocab_index == vocabulary->sil_index ) { if ( sil_index >= 0 ) error("LexiconInfo::LexiconInfo - cannot have >1 pronuns of the sil word\n") ; sil_index = n_entries ; } n_entries++ ; } if ( n_entries != total_n_entries ) error("LexiconInfo::LexiconInfo - unexpected n_entries\n") ; if ( (sent_end_index >= 0) && (sent_start_index == sent_end_index) ) { // Create a separate, identical entry for the sent_end_index // so that there will be a separate model for the sentence end word. entries = (LexiconInfoEntry *)allocator->realloc( entries , (n_entries+1)*sizeof(LexiconInfoEntry) ) ; curr_entry = entries + n_entries ; initLexInfoEntry( curr_entry ) ; curr_entry->vocab_index = entries[sent_start_index].vocab_index ; curr_entry->log_prior = entries[sent_start_index].log_prior ; curr_entry->n_phones = entries[sent_start_index].n_phones ; curr_entry->phones = (int *)allocator->alloc( curr_entry->n_phones*sizeof(int) ) ; memcpy( curr_entry->phones , entries[sent_start_index].phones , curr_entry->n_phones*sizeof(int) ) ; // Update the appropriate vocab_to_lex_map entry temp = ++(vocab_to_lex_map[curr_entry->vocab_index].n_pronuns) ; vocab_to_lex_map[curr_entry->vocab_index].pronuns = (int *)allocator->realloc( vocab_to_lex_map[curr_entry->vocab_index].pronuns , temp*sizeof(int) ) ; vocab_to_lex_map[curr_entry->vocab_index].pronuns[temp-1] = n_entries ; sent_end_index = n_entries++ ; } if ( (sil_index >= 0) && ((sil_index==sent_start_index) || (sil_index==sent_end_index)) ) { if ( sil_index == sent_end_index ) ind = sent_end_index ; else ind = sent_start_index ; // Create a separate, identical entry for the sil_index // so that there will be a separate model for the silence word. entries = (LexiconInfoEntry *)allocator->realloc( entries , (n_entries+1)*sizeof(LexiconInfoEntry) ) ; curr_entry = entries + n_entries ; initLexInfoEntry( curr_entry ) ; curr_entry->vocab_index = entries[ind].vocab_index ; curr_entry->log_prior = entries[ind].log_prior ; curr_entry->n_phones = entries[ind].n_phones ; curr_entry->phones = (int *)allocator->alloc( curr_entry->n_phones*sizeof(int) ) ; memcpy( curr_entry->phones , entries[ind].phones , curr_entry->n_phones*sizeof(int) ) ; // Update the appropriate vocab_to_lex_map entry temp = ++(vocab_to_lex_map[curr_entry->vocab_index].n_pronuns) ; vocab_to_lex_map[curr_entry->vocab_index].pronuns = (int *)allocator->realloc( vocab_to_lex_map[curr_entry->vocab_index].pronuns , temp*sizeof(int) ) ; vocab_to_lex_map[curr_entry->vocab_index].pronuns[temp-1] = n_entries ; sil_index = n_entries++ ; } } LexiconInfo::~LexiconInfo() { } void LexiconInfo::initLexInfoEntry( LexiconInfoEntry *entry ) { entry->n_phones = 0 ; entry->phones = NULL ; entry->log_prior = 0.0 ; entry->vocab_index = -1 ; } #ifdef DEBUG void LexiconInfo::outputText() { printf("n_entries=%d start_ind=%d end_ind=%d sil_ind=%d\n",n_entries,sent_start_index, sent_end_index,sil_index) ; for ( int i=0 ; igetWord( entries[i].vocab_index ),entries[i].log_prior) ; for ( int j=0 ; jgetPhone( entries[i].phones[j] )) ; printf("\n") ; } } #endif } torch3-3.1.orig/datasets/DiskHTKDataSet.h0000644000175000017500000000503110106445235020363 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef DISK_HTK_DATA_SET_INC #define DISK_HTK_DATA_SET_INC #include "DiskDataSet.h" #include "MeanVarNorm.h" #include "IOHTK.h" #include "LexiconInfo.h" namespace Torch { /** Provides an interface to manipulate HTK data which are kept on disk, and not fully loaded in memory. It uses #IOSequence#. Usefull for large databases. @see DiskMatDataSet @see IOSequence @see IOHTK @see IOHTKTarget @author Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) */ class DiskHTKDataSet : public DiskDataSet { private: void init_(IOSequence *io_file, int n_inputs_, int n_targets_); public: int n_per_frame; DiskHTKDataSet(const char *inputs_filenames, bool one_file_is_one_sequence, int max_load, const char * targets_filename = NULL, LexiconInfo* lex_=NULL, bool words=true); DiskHTKDataSet(char **inputs_filenames, char ** targets_filename, int n_files_, bool one_file_is_one_sequence, int max_load, LexiconInfo* lex_=NULL, bool words=true); virtual ~DiskHTKDataSet(); }; } #endif torch3-3.1.orig/datasets/HTKDataSet.cc0000644000175000017500000000742510106445235017717 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "HTKDataSet.h" #include "IOHTK.h" #include "IOHTKTarget.h" #include "IOSub.h" #include "IOMulti.h" namespace Torch { HTKDataSet::HTKDataSet(const char *inputs_filename, bool one_file_is_one_sequence, int max_load, const char *targets_filename, LexiconInfo* lex_, bool words) { io_allocator = new Allocator; IOHTK* io_inputs = new(io_allocator) IOHTK(inputs_filename, one_file_is_one_sequence, max_load,true); IOHTKTarget* io_targets = NULL; if(targets_filename) { n_per_frame = (int)(io_inputs->header->sample_period); io_targets = new(io_allocator) IOHTKTarget(targets_filename, lex_, n_per_frame, words); } MemoryDataSet::init(io_inputs, io_targets); message("HTKDataSet: %d examples loaded", n_examples); delete io_allocator; } HTKDataSet::HTKDataSet(char **inputs_filenames, char ** targets_filenames, int n_files_, bool one_file_is_one_sequence, int max_load, LexiconInfo* lex_, bool words) { io_allocator = new Allocator; if(n_files_ <= 0) error("HTKDataSet: check the number of files!"); //inputs int n_files = n_files_; IOHTK **io_files = (IOHTK **)io_allocator->alloc(sizeof(IOHTK *)*n_files_); if(max_load > 0) { int i = 0; while( (max_load > 0) && (i < n_files_) ) { io_files[i] = new(io_allocator) IOHTK(inputs_filenames[i], one_file_is_one_sequence, max_load, true); max_load -= io_files[i]->n_sequences; i++; } n_files = i; } else { for(int i = 0; i < n_files_; i++) io_files[i] = new(io_allocator) IOHTK(inputs_filenames[i], one_file_is_one_sequence, -1, true); } IOMulti* io_inputs = new(io_allocator) IOMulti((IOSequence**)io_files, n_files); //targets IOMulti* io_targets = NULL; if(targets_filenames){ n_per_frame = (int)(io_files[0]->header->sample_period); IOHTKTarget **io_files_targets = (IOHTKTarget **)io_allocator->alloc(sizeof(IOHTKTarget *)*n_files); for ( int i=0; i < n_files;i++) io_files_targets[i] = new(io_allocator) IOHTKTarget(targets_filenames[i], lex_, n_per_frame, words); io_targets = new(io_allocator) IOMulti((IOSequence**)io_files_targets, n_files); } MemoryDataSet::init(io_inputs, io_targets); message("HTKDataSet: %d examples loaded", n_examples); delete io_allocator; } HTKDataSet::~HTKDataSet() { } } torch3-3.1.orig/datasets/LexiconInfo.h0000644000175000017500000000631010106445235020072 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef LEXICONINFO_INC #define LEXICONINFO_INC #include "general.h" #include "Object.h" #include "Vocabulary.h" #include "PhoneInfo.h" namespace Torch { typedef struct { int n_phones ; int *phones ; real log_prior ; int vocab_index ; } LexiconInfoEntry ; typedef struct { int n_pronuns ; int *pronuns ; } VocabToLexMapEntry ; /** This class stores information about how phonemes are assembled into pronunciations. For each pronunciation, a list of indices into a PhoneInfo instance is stored, along with a prior and a index into a Vocabulary instance. Information is also stored to map Vocabulary entries to one or more pronunciations. @author Darren Moore (moore@idiap.ch) */ class LexiconInfo : public Object { public: int n_entries ; LexiconInfoEntry *entries ; int sent_start_index ; int sent_end_index ; int sil_index ; PhoneInfo *phone_info ; Vocabulary *vocabulary ; VocabToLexMapEntry *vocab_to_lex_map ; /// Creates a LexiconInfo instance. 'phones_fname' is used to create a /// PhoneInfo instance (see PhoneInfo header). 'lex_fname' is used to /// create a Vocabulary instance and then to create pronunciation /// entries and the mapping between Vocabulary entries and pronunciation /// entries. LexiconInfo( const char *phones_fname , const char *sil_phone , const char *pause_phone , const char *lex_fname , const char *sent_start_word=NULL , const char *sent_end_word=NULL , const char *sil_word=NULL ) ; virtual ~LexiconInfo() ; void initLexInfoEntry( LexiconInfoEntry *entry ) ; #ifdef DEBUG void outputText() ; #endif }; } #endif torch3-3.1.orig/datasets/HTKDataSet.h0000644000175000017500000000466210106445235017561 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef HTK_DATA_SET_INC #define HTK_DATA_SET_INC #include "MemoryDataSet.h" #include "MeanVarNorm.h" #include "IOHTK.h" #include "LexiconInfo.h" namespace Torch { /** This dataset can deal with the HTK format for features and targets. @see MatDataSet @see IOHTK @see IOHTKTarget @author Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) */ class HTKDataSet : public MemoryDataSet { private: void init_(IOSequence *io_file, int n_inputs_, int n_targets_); Allocator *io_allocator; public: int n_per_frame; /// HTKDataSet(const char *inputs_filenames, bool one_file_is_one_sequence, int max_load, const char * targets_filename = NULL, LexiconInfo* lex_=NULL, bool words=true); /// HTKDataSet(char **inputs_filenames, char ** targets_filename, int n_files_, bool one_file_is_one_sequence, int max_load, LexiconInfo* lex_=NULL, bool words=true); virtual ~HTKDataSet(); }; } #endif torch3-3.1.orig/datasets/PhoneInfo.cc0000644000175000017500000002363510106445235017711 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "PhoneInfo.h" namespace Torch { PhoneInfo::PhoneInfo() { n_phones = 0 ; phone_names = NULL ; sil_index = -1 ; pause_index = -1 ; } PhoneInfo::PhoneInfo( const char *phones_fname , const char *sil_name , const char *pause_name ) { DiskXFile phones_fd( phones_fname , "r" ) ; char *str , line[1000] ; int total_n_phones ; if ( (phones_fname == NULL) || (strcmp(phones_fname,"")==0) ) error("PhoneInfo::PhoneInfo(2) - phones_fname undefined\n") ; n_phones = 0 ; phone_names = NULL ; sil_index = -1 ; pause_index = -1 ; // read the first line of the file and use it to determine the file type phones_fd.gets( line , 1000 ) ; if ( strstr( line , "PHONE" ) ) { // This is a NOWAY format phone models file readPhonesFromNoway( &phones_fd , sil_name , pause_name ) ; } else if ( strstr( line , "~o" ) ) { // This is a HTK model definition file readPhonesFromHTK( &phones_fd , sil_name , pause_name ) ; } else { // Assume that the file contains just a list of phone names // with 1 phone name per line. // Do a first pass to determine the number of phones total_n_phones = 0 ; do { if ( (line[0] == '#') || ((str = strtok( line , " \r\n\t" )) == NULL) ) continue ; total_n_phones++ ; } while ( phones_fd.gets( line , 1000 ) != NULL ) ; // Allocate some memory for the list of phone names. phone_names = (char **)allocator->alloc( total_n_phones * sizeof(char *) ) ; phones_fd.seek( 0 , SEEK_SET ) ; while ( phones_fd.gets( line , 1000 ) != NULL ) { if ( (line[0] == '#') || ((str = strtok( line , " \r\n\t" )) == NULL) ) continue ; if ( n_phones >= total_n_phones ) error("PhoneInfo::PhoneInfo - n_phones exceeds expected\n") ; phone_names[n_phones] = (char *)allocator->alloc( (strlen(str)+1) * sizeof(char) ) ; strcpy( phone_names[n_phones] , str ) ; if ( (sil_name != NULL) && (strcmp(sil_name,str)==0) ) { if ( sil_index >= 0 ) error("PhoneInfo::PhoneInfo(2) - sil_index already defined\n") ; sil_index = n_phones ; } if ( (pause_name != NULL) && (strcmp(pause_name,str)==0) ) { if ( pause_index >= 0 ) error("PhoneInfo::PhoneInfo(2) - pause_index already defined\n") ; pause_index = n_phones ; } n_phones++ ; } if ( n_phones != total_n_phones ) error("PhoneInfo::PhoneInfo(2) - unexpected n_phones\n") ; } if ( (sil_name != NULL) && (strcmp(sil_name,"")!=0) && (sil_index<0) ) error("PhoneInfo::PhoneInfo(2) - silence phone not found\n") ; if ( (pause_name != NULL) && (strcmp(pause_name,"")!=0) && (pause_index<0) ) error("PhoneInfo::PhoneInfo(2) - pause phone not found\n") ; } PhoneInfo::~PhoneInfo() { } void PhoneInfo::addPhone( char *phone_name , bool is_sil , bool is_pause ) { if ( phone_name == NULL ) return ; phone_names = (char **)allocator->realloc( phone_names , (n_phones+1)*sizeof(char *) ) ; phone_names[n_phones] = (char *)allocator->alloc( (strlen(phone_name)+1)*sizeof(char) ) ; strcpy( phone_names[n_phones] , phone_name ) ; if ( is_sil == true ) { if ( sil_index >= 0 ) error("PhoneInfo::addPhone - silence phone already defined\n") ; sil_index = n_phones ; } if ( is_pause == true ) { if ( pause_index >= 0 ) error("PhoneInfo::addPhone - pause phone already defined\n") ; pause_index = n_phones ; } n_phones++ ; } char *PhoneInfo::getPhone( int index ) { if ( (index < 0) || (index >= n_phones) ) error("PhoneInfo::getPhone - index out of range\n") ; return phone_names[index] ; } int PhoneInfo::getIndex( char *phone_name ) { if ( phone_name == NULL ) error("PhoneInfo::getIndex - phone_name is NULL\n") ; // Just do a linear search. for ( int i=0 ; igets( line , 1000 ) ; if ( sscanf( line , "%d" , &n_phones ) != 1 ) error("PhoneInfo::readPhonesFromNoway - error reading n_phones\n") ; phone_names = (char **)allocator->alloc( n_phones * sizeof(char *) ) ; while ( phones_fd->gets( line , 1000 ) != NULL ) { // interpret the line containing the index, n_states, name fields if ( sscanf( line , "%d %d %s" , &index , &n_states , str ) != 3 ) error("PhoneInfo::readPhonesFromNoway - error reading index,n_st,name line\n") ; if ( index != (cnt+1) ) error("PhoneInfo::readPhonesFromNoway - phone index mismatch\n") ; // add the phone to our list phone_names[cnt] = (char *)allocator->alloc( (strlen(str)+1)*sizeof(char) ) ; strcpy( phone_names[cnt] , str ) ; if ( (sil_name != NULL) && (strcmp(sil_name,str)==0) ) { if ( sil_index >= 0 ) error("PhoneInfo::readPhonesFromNoway - sil_index already defined\n") ; sil_index = cnt ; } if ( (pause_name != NULL) && (strcmp(pause_name,str)==0) ) { if ( pause_index >= 0 ) error("PhoneInfo::readPhonesFromNoway - pause_index already defined\n") ; pause_index = cnt ; } // There are (n_states+1) lines before the next line containing a phone name. // Read and discard. for ( int i=0 ; i<(n_states+1) ; i++ ) phones_fd->gets( line , 1000 ) ; cnt++ ; } if ( cnt != n_phones ) error("PhoneInfo::readPhonesFromNoway - n_phones mismatch\n") ; } void PhoneInfo::readPhonesFromHTK( DiskXFile *phones_fd , const char *sil_name , const char *pause_name ) { char line[1000] , *str ; int total_n_phones=0 ; // Assume the first line of the file has already been read. // Do a first pass of the file to determine the number of phones. while ( phones_fd->gets( line , 1000 ) != NULL ) { if ( strstr( line , "~h" ) != NULL ) total_n_phones++ ; } // Allocate memory phone_names = (char **)allocator->alloc( total_n_phones * sizeof(char *) ) ; phones_fd->seek( 0 , SEEK_SET ) ; n_phones = 0 ; while ( phones_fd->gets( line , 1000 ) != NULL ) { if ( strstr( line , "~h" ) != NULL ) { strtok( line , "\"" ) ; // get past the ~h if ( (str = strtok( NULL , "\"" )) == NULL ) error("PhoneInfo::readPhonesFromHTK - could not locate phone name\n") ; if ( n_phones >= total_n_phones ) error("PhoneInfo::readPhonesFromHTK - n_phones exceeds expected\n") ; phone_names[n_phones] = (char *)allocator->alloc( (strlen(str)+1)*sizeof(char) ) ; strcpy( phone_names[n_phones] , str ) ; if ( (sil_name != NULL) && (strcmp(sil_name,str)==0) ) { if ( sil_index >= 0 ) error("PhoneInfo::readPhonesFromHTK - sil_index already defined\n") ; sil_index = n_phones ; } if ( (pause_name != NULL) && (strcmp(pause_name,str)==0) ) { if ( pause_index >= 0 ) error("PhoneInfo::readPhonesFromHTK - pause_index already defined\n") ; pause_index = n_phones ; } n_phones++ ; } } if ( total_n_phones != n_phones ) error("PhoneInfo::readPhonesFromHTK - unexpected n_phones\n") ; } #ifdef DEBUG void PhoneInfo::outputText() { printf("PhoneInfo: n_phones=%d sil_index=%d pause_index=%d\n",n_phones,sil_index,pause_index) ; for ( int i=0 ; ialloc(strlen(filename_)+1); header = (HTKHeader *)allocator->alloc(sizeof(HTKHeader)); strcpy(filename, filename_); // Read the file... file = new(allocator) DiskXFile(filename, "r"); // Read the header readHeader(file); //vector size and number of frames n_total_frames = header->n_samples; frame_size = header->sample_size/4; //some error check if(!(n_total_frames >= 0)||!(frame_size>0)) error("IOHTK: file %s is probably not an HTK file format\n",filename); if( (max_load > 0) && (max_load < n_total_frames) && (!one_file_is_one_sequence) ) { n_total_frames = max_load; message("IOHTK: loading only %d rows", n_total_frames); } // Prepare the sequence buffer... if(one_file_is_one_sequence) n_sequences = 1; else n_sequences = n_total_frames; current_frame_index = -1; allocator->free(file); } void IOHTK::readHeader(XFile* file_){ file_->read(&header->n_samples,sizeof(long),1); file_->read(&header->sample_period,sizeof(long),1); file_->read(&header->sample_size,sizeof(short),1); file_->read(&header->sample_kind,sizeof(short),1); } void IOHTK::getSequence(int t, Sequence* sequence) { // Cas simple: on lit tout le bordel if(one_file_is_one_sequence) { file = new(allocator) DiskXFile(filename, "r"); readHeader(file); #ifdef USE_DOUBLE float* temp = (float*)allocator->alloc(sizeof(float)*frame_size); for(int i = 0; i < n_total_frames; i++){ file->read(temp, sizeof(float),frame_size); for(int j = 0; j < frame_size; j++) sequence->frames[i][j] = temp[j]; } allocator->free(temp); #else for(int i = 0; i < n_total_frames; i++) file->read(sequence->frames[i], sizeof(real), frame_size); #endif allocator->free(file); } else { // Sequentiel ? if(is_sequential) { if(t != current_frame_index+1) error("IOBin: sorry, data are accessible only in a sequential way"); // Doit-on ouvrir le putain de fichier ? if(current_frame_index < 0) { file = new(allocator) DiskXFile(filename, "r"); readHeader(file); } } else { file = new(allocator) DiskXFile(filename, "r"); if(file->seek(t*frame_size*sizeof(real)+2*sizeof(long)+2*sizeof(short), SEEK_CUR) != 0) error("IOBin: cannot seek in your file!"); } // Lis la frame mec #ifdef USE_DOUBLE float* temp = (float*)allocator->alloc(sizeof(float)*frame_size); file->read(temp, sizeof(float),frame_size); for(int j = 0; j < frame_size; j++) sequence->frames[0][j] = temp[j]; allocator->free(temp); #else file->read(sequence->frames[0], sizeof(real), frame_size); #endif if(is_sequential) { // Si je suis a la fin du fichier, je le zigouille. current_frame_index++; if(current_frame_index == n_total_frames-1) { allocator->free(file); current_frame_index = -1; } } else allocator->free(file); } } int IOHTK::getNumberOfFrames(int t) { if(one_file_is_one_sequence) return n_total_frames; else return 1; } int IOHTK::getTotalNumberOfFrames() { return n_total_frames; } void IOHTK::saveSequence(XFile *file, Sequence* sequence, HTKHeader* header_) { file->write(&header_->n_samples,sizeof(long),1); file->write(&header_->sample_period,sizeof(long),1); file->write(&header_->sample_size,sizeof(short),1); file->write(&header_->sample_kind,sizeof(short),1); #ifdef USE_DOUBLE float* temp = (float*)Allocator::sysAlloc(sizeof(float)*sequence->frame_size); for(int i = 0; i < sequence->n_frames; i++){ for(int j = 0; j < sequence->frame_size; j++) temp[j] = (float)sequence->frames[i][j]; file->write(temp, sizeof(float), sequence->frame_size); } free(temp); #else for(int i = 0; i < sequence->n_frames; i++) file->write(sequence->frames[i], sizeof(real), sequence->frame_size); #endif } IOHTK::~IOHTK() { } /******************* HTK source code **********************/ char* IOHTK::parmKind2Str(ParmKind the_kind, char* buf) { strcpy(buf,pmkmap[baseParmKind(the_kind)]); if (hasEnergy(the_kind)) strcat(buf,"_E"); if (hasDelta(the_kind)) strcat(buf,"_D"); if (hasNulle(the_kind)) strcat(buf,"_N"); if (hasAccs(the_kind)) strcat(buf,"_A"); if (hasCompx(the_kind)) strcat(buf,"_C"); if (hasCrcc(the_kind)) strcat(buf,"_K"); if (hasZerom(the_kind)) strcat(buf,"_Z"); if (hasZeroc(the_kind)) strcat(buf,"_0"); if (hasVQ(the_kind)) strcat(buf,"_V"); return buf; } ParmKind IOHTK::str2ParmKind(char *str) { ParmKind i = -1; char *s,buf[255]; bool hasE,hasD,hasN,hasA,hasC,hasK,hasZ,has0,hasV,found; int len; hasV=hasE=hasD=hasN=hasA=hasC=hasK=hasZ=has0=false; strcpy(buf,str);len=strlen(buf); s=buf+len-2; while (len>2 && *s=='_') { switch(*(s+1)){ case 'E': hasE = true;break; case 'D': hasD = true;break; case 'N': hasN = true;break; case 'A': hasA = true;break; case 'C': hasC = true;break; case 'K': hasK = true;break; case 'Z': hasZ = true;break; case '0': has0 = true;break; case 'V': hasV = true;break; default: error("str2ParmKind: unknown ParmKind qualifier %s",str); exit (-1); } *s = '\0';len -= 2;s -= 2; } found = false; do { s=(char*)pmkmap[++i]; if (strcmp(buf,s) == 0) { found = true; break; } } while (strcmp("ANON",s)!=0); if (!found) return ANON; if (i == LPDELCEP) /* for backward compatibility with V1.2 */ i = LPCEPSTRA | HASDELTA; if (hasE) i |= HASENERGY; if (hasD) i |= HASDELTA; if (hasN) i |= HASNULLE; if (hasA) i |= HASACCS; if (hasK) i |= HASCRCC; if (hasC) i |= HASCOMPX; if (hasZ) i |= HASZEROM; if (has0) i |= HASZEROC; if (hasV) i |= HASVQ; return i; } ParmKind IOHTK::baseParmKind(ParmKind k) { return k & BASEMASK;} /* EXPORT->HasXXXX: returns true if XXXX included in ParmKind */ bool IOHTK::hasEnergy(ParmKind k){return (k & HASENERGY) != 0;} bool IOHTK::hasDelta(ParmKind k) {return (k & HASDELTA) != 0;} bool IOHTK::hasAccs(ParmKind k) {return (k & HASACCS) != 0;} bool IOHTK::hasNulle(ParmKind k) {return (k & HASNULLE) != 0;} bool IOHTK::hasCompx(ParmKind k) {return (k & HASCOMPX) != 0;} bool IOHTK::hasCrcc(ParmKind k) {return (k & HASCRCC) != 0;} bool IOHTK::hasZerom(ParmKind k) {return (k & HASZEROM) != 0;} bool IOHTK::hasZeroc(ParmKind k) {return (k & HASZEROC) != 0;} bool IOHTK::hasVQ(ParmKind k) {return (k & HASVQ) != 0;} } torch3-3.1.orig/datasets/IOHTK.h0000644000175000017500000001222610106445235016536 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef IO_HTK_INC #define IO_HTK_INC #include "IOSequence.h" #include "DiskXFile.h" namespace Torch { // HTK File Header struct HTKHeader { long n_samples; long sample_period; short sample_size; short sample_kind; }; #define BASEMASK 077 /* Mask to remove qualifiers */ #define HASENERGY 0100 /* _E log energy included */ #define HASNULLE 0200 /* _N absolute energy suppressed */ #define HASDELTA 0400 /* _D delta coef appended */ #define HASACCS 01000 /* _A acceleration coefs appended */ #define HASCOMPX 02000 /* _C is compressed */ #define HASZEROM 04000 /* _Z zero meaned */ #define HASCRCC 010000 /* _K has CRC check */ #define HASZEROC 020000 /* _0 0'th Cepstra included */ #define HASVQ 040000 /* _V has VQ index attached */ enum _BaseParmKind{ WAVEFORM, /* Raw speech waveform (handled by HWave) */ LPC,LPREFC,LPCEPSTRA,LPDELCEP, /* LP-based Coefficients */ IREFC, /* Ref Coef in 16 bit form */ MFCC, /* Mel-Freq Cepstra */ FBANK, /* Log Filter Bank */ MELSPEC, /* Mel-Freq Spectrum (Linear) */ USER, /* Arbitrary user specified data */ DISCRETE, /* Discrete VQ symbols (shorts) */ ANON }; typedef short ParmKind; /* BaseParmKind + Qualifiers */ /** Handles the standard binary sequence format in HTK. @see IOBin @author Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) */ class IOHTK : public IOSequence { private: DiskXFile *file; int current_frame_index; void readHeader(XFile* file); public: bool one_file_is_one_sequence; int n_total_frames; char *filename; int max_load; bool is_sequential; /// Contain the htk header informations HTKHeader* header; /** Reads the sequence contained in #filename#. If #one_file_is_one_sequence# is false, #getSequence()# will return one sequence with one frame at each call. (If calling #getSequence(t, foo)#, it will put in the sequence #foo# the frame corresponding to the line #t# of the file). Note also that if #one_file_is_one_sequence# is false, the access to the IO must be sequential when calling #getSequence()# if #is_sequential# is true. (Sequential mode is faster). If #max_load_# is positive, it loads only the first #max_load_# frames, if #one_file_is_one_sequence# is false. The file will be opened when reading the first sequence, and closed when reading the last one if #is_sequential# is true. Otherwise, the file will be opened and closed each time you call #getSequence()#. */ IOHTK(const char *filename_, bool one_file_is_one_sequence_=false, int max_load_=-1, bool is_sequential_=false); /// Saves #sequence# in #file# using the HTK format. static void saveSequence(XFile *file, Sequence *sequence, HTKHeader* header_); virtual void getSequence(int t, Sequence *sequence); virtual int getNumberOfFrames(int t); virtual int getTotalNumberOfFrames(); virtual ~IOHTK(); /// HTK source code char* parmKind2Str(ParmKind kind, char *buf); ParmKind str2ParmKind(char *str); ParmKind baseParmKind(ParmKind kind); bool hasEnergy(ParmKind kind); bool hasDelta(ParmKind kind) ; bool hasAccs(ParmKind kind) ; bool hasNulle(ParmKind kind) ; bool hasCompx(ParmKind kind) ; bool hasCrcc(ParmKind kind) ; bool hasZerom(ParmKind kind) ; bool hasZeroc(ParmKind kind) ; bool hasVQ(ParmKind kind) ; }; } #endif torch3-3.1.orig/datasets/Vocabulary.cc0000644000175000017500000001774010106445235020133 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Vocabulary.h" #include "ctype.h" #include "log_add.h" #include "DiskXFile.h" namespace Torch { Vocabulary::Vocabulary( const char *lex_fname , const char *sent_start_word , const char *sent_end_word , const char *sil_word ) { DiskXFile lex_fd( lex_fname , "r" ) ; char line1[1000] , line2[1000] , *line , *prev_line ; int total_n_words ; if ( (lex_fname == NULL) || (strcmp(lex_fname,"")==0) ) error("Vocabulary::Vocabulary - lexicon filename undefined\n") ; n_words = 0 ; words = NULL ; sent_start_index = -1 ; sent_end_index = -1 ; sil_index = -1 ; // Do a first-pass of the file to determine memory requirements. // We assume that multiple pronunciations will be grouped together. line = line1 ; prev_line = line2 ; prev_line[0] = '\0' ; total_n_words = 0 ; while ( lex_fd.gets( line , 1000 ) != NULL ) { if ( (line[0] == '#') || (strtok( line , "(\r\n\t " ) == NULL) ) continue ; // Is this a new word ? if ( strcmp( line , prev_line ) != 0 ) total_n_words++ ; if ( line == line1 ) { line = line2 ; prev_line = line1 ; } else { line = line1 ; prev_line = line2 ; } } // Allocate the 'words' array words = (char **)allocator->alloc( total_n_words * sizeof(char *) ) ; // Return to the start of the file. lex_fd.seek( 0 , SEEK_SET ) ; // Add words to the vocabulary. // Do not add duplicates. // Maintain alphabetical order. while ( lex_fd.gets( line , 1000 ) != NULL ) { if ( (line[0] == '#') || (strtok( line , "(\r\n\t " ) == NULL) ) continue ; // add it to the vocabulary addWord( line ) ; } if ( n_words > total_n_words ) error("Vocabulary::Vocabulary - n_words exceeds expected.\n") ; sent_start_index = -1 ; if ( (sent_start_word != NULL) && (strcmp(sent_start_word,"") != 0) ) { for ( int i=0 ; i= 0 ) error("Vocabulary::Vocabulary - duplicate start words\n") ; sent_start_index = i ; } } } sent_end_index = -1 ; if ( (sent_end_word != NULL) && (strcmp(sent_end_word,"") != 0) ) { for ( int i=0 ; i= 0 ) error("Vocabulary::Vocabulary - duplicate end words\n") ; sent_end_index = i ; } } } sil_index = -1 ; if ( (sil_word != NULL) && (strcmp(sil_word,"") != 0) ) { for ( int i=0 ; i= 0 ) error("Vocabulary::Vocabulary - duplicate end words\n") ; sil_index = i ; } } } if ( n_words != total_n_words ) error("Vocabulary::Vocabulary - did not get expected n_words\n") ; } Vocabulary::~Vocabulary() { } void Vocabulary::addWord( char *word ) { int cmp_result=0 ; if ( word[0] == '#' ) { // The string is a comment so don't add to vocabulary return ; } if ( n_words > 0 ) cmp_result = strcmp( words[n_words-1] , word ) ; if ( (cmp_result < 0) || (n_words == 0) ) { // The new word belongs at the end of the list // Allocate memory in the list of words for the new word words[n_words] = (char *)allocator->alloc( (strlen(word)+1) * sizeof(char) ) ; strcpy( words[n_words] , word ) ; n_words++ ; return ; } else if ( cmp_result > 0 ) { // Find the place in the list of words where we want to insert the new word for ( int i=0 ; i 0 ) { // Shuffle down all words from i onwards and place the // new word in position i. // Allocate memory in the list of words for the new word for ( int j=n_words ; j>i ; j-- ) words[j] = words[j-1] ; words[i] = (char *)allocator->alloc( (strlen(word)+1) * sizeof(char) ) ; strcpy( words[i] , word ) ; n_words++ ; return ; } else if ( cmp_result == 0 ) { // the word is already in our vocab - don't duplicate return ; } } } else { // The word is already at the end of the list - don't duplicate return ; } // If we make it here there is a problem return ; } char *Vocabulary::getWord( int index ) { if ( (index<0) || (index>=n_words) ) error("Vocabulary::getWord - index out of range\n") ; else return words[index] ; return NULL ; } int Vocabulary::getIndex( char *word , int guess ) { // We assume that the list of words is in ascending order so // that we can do a binary search. int min=0 , max=(n_words-1) , curr_pos=0 ; int cmp_result=0 ; // If guess is valid, do a quick check to see if the word is where // the caller expects it to be - either at guess or at guess+1 if ( (guess >= 0) && (guess 0 ) min = curr_pos+1 ; else return curr_pos ; if ( min > max ) return -1 ; } return -1 ; } #ifdef DEBUG void Vocabulary::outputText() { printf("** START VOCABULARY - n_words=%d start_index=%d end_index=%d sil_index=%d**\n" , n_words , sent_start_index , sent_end_index , sil_index ) ; for ( int i=0 ; ialloc(strlen(filename_)+1); strcpy(filename, filename_); // Read the file... file = new(allocator) DiskXFile(filename, "r"); char buffer[80]; file->gets(buffer,80); char* elements[10]; elements[0] = strtok(buffer," \t"); int n_element; for (n_element=1;(elements[n_element]=strtok(NULL," \t"));n_element++); if(n_element == 1){ frame_size = 1; }else if (n_element == 3){ frame_size = 2; }else error("IOHTKTarget: this target file contains more nor 1 or 3 columns (%d)...",n_element); n_total_frames = 1; while(file->gets(buffer,80)){ n_total_frames++; } n_sequences = 1; allocator->free(file); } int IOHTKTarget::getNumberOfFrames(int t) { return n_total_frames; } int IOHTKTarget::getTotalNumberOfFrames() { return n_total_frames; } void IOHTKTarget::saveSequence(XFile *file_, Sequence* sequence, LexiconInfo* lex_, int n_per_frame_, bool words_) { for(int i = 0; i < sequence->n_frames; i++) { if (sequence->frame_size == 2) { int begin = i == 0 ? 0 : (int)(sequence->frames[i-1][1] * n_per_frame_); int end = (int)(sequence->frames[i][1] *n_per_frame_); file_->printf("%d",begin); file_->printf("%d",end); } if(words_) file_->printf("%s\n",lex_->vocabulary->words[(int)sequence->frames[i][0]]); else file_->printf("%s\n",lex_->phone_info->phone_names[(int)sequence->frames[i][0]]); } } void IOHTKTarget::getSequence(int t, Sequence* sequence) { // Read the file... char buffer[80]; int begin; int end; file = new(allocator) DiskXFile(filename, "r"); for(int i = 0; i < n_total_frames; i++) { real *dest_ = sequence->frames[i]; if(frame_size == 2){ file->scanf("%d",&begin); file->scanf("%d",&end); dest_[1] = (real)(end/n_per_frame); } file->scanf("%s",&buffer); if(words) dest_[0] = lexicon->vocabulary->getIndex(buffer); else dest_[0] = lexicon->phone_info->getIndex(buffer); if (dest_[0] < 0) error("IOHTKTarget::getSequence: \"%s\" not found in %s", buffer, words? "vocabulary" : "phone set"); } allocator->free(file); } IOHTKTarget::~IOHTKTarget() { } } torch3-3.1.orig/datasets/IOHTKTarget.h0000644000175000017500000000546410106445235017713 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef IO_HTK_TARGET_INC #define IO_HTK_TARGET_INC #include "IOSequence.h" #include "LexiconInfo.h" #include "DiskXFile.h" namespace Torch { /** Handles the standard Ascii HTK targets/labels format in Torch. There are two format: \begin{itemize} \item each line is a word/phoneme separated by a space). \item each line consiste in two integer (begin and end of the sequence) and a string containing the label (word/phoneme) \end{itemize} @see IOAscii @author Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) */ class IOHTKTarget : public IOSequence { private: DiskXFile *file; int current_frame_index; public: bool one_file_is_one_sequence; int n_total_frames; char *filename; int max_load; int n_per_frame; LexiconInfo* lexicon; bool words; IOHTKTarget(const char *filename_, LexiconInfo* lex_, int n_per_frame_, bool words=true); /// Saves one #sequence# in #file# using the ascii format. /// static added by John Dines static void saveSequence(XFile *file, Sequence* sequence, LexiconInfo* lex_, int n_per_frame_, bool words_=true); virtual void getSequence(int t, Sequence *sequence); virtual int getNumberOfFrames(int t); virtual int getTotalNumberOfFrames(); virtual ~IOHTKTarget(); }; } #endif torch3-3.1.orig/datasets/Makefile0000644000175000017500000000172710106445235017153 0ustar kalfakalfa00000000000000# get user and architecture specific options OS := $(shell uname -s) TORCHDIR := $(shell cd ..; pwd) include ../Makefile_options_$(OS) CC_FILES := $(wildcard *.cc) OBJS := $(foreach f,$(CC_FILES),$(OBJS_DIR)/$(patsubst %.cc,%.o,$(f))) all: $(LIBTORCH) $(LIBTORCH): $(OBJS) @echo "Archiving..." @$(AR) $(LIBTORCH) $(OBJS) $(OBJS_DIR)/%.o: %.cc @echo $< @$(CC) $(CFLAGS_$(MODE)) $(INCS) -o $@ -c $< distclean: @\rm -f .deps_* clean: @echo "Remove objects file and dependencies..." @\rm -Rf $(OBJS) $(LIBTORCH) @\rm -f .deps_$(VERSION_KEY) depend: @echo "Tracking dependencies..." @\rm -f .deps_$(VERSION_KEY) @for file in *.cc ; do printf "$(OBJS_DIR)/" >> .deps_$(VERSION_KEY); $(DEP) $(CFLAGS_$(MODE)) $(INCS) $$file >> .deps_$(VERSION_KEY); done .deps_$(VERSION_KEY): @echo ">>> Please do a 'make depend' <<<" exit 10 ifneq ($(MAKECMDGOALS),distclean) ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),depend) include .deps_$(VERSION_KEY) endif endif endif torch3-3.1.orig/datasets/Vocabulary.h0000644000175000017500000000622110106445235017765 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef VOCABULARY_INC #define VOCABULARY_INC #include "general.h" #include "Object.h" namespace Torch { /** This object contains the list of words we want our recogniser to recognise plus a few "special" words (eg. sentence markers, silence word). There are no duplicates in the list, and the list is sorted alphabetically. @author Darren Moore (moore@idiap.ch) */ class Vocabulary : public Object { public: int n_words ; char **words ; int sent_start_index ; int sent_end_index ; int sil_index ; /* Constructors / Destructor */ /// Creates the vocabulary. /// 'lex_fname' is the name of the lexicon file containing the pronunciations to be /// recognised. The format is the standard "word(prior) ph ph ph" format /// where the (prior) is optional. /// 'sent_start_word' and 'sent_end_word' are the words that will start and /// end every recognised utterance. Vocabulary( const char *lex_fname , const char *sent_start_word , const char *sent_end_word , const char *sil_word=NULL ) ; virtual ~Vocabulary() ; /* Methods */ /// Adds a word to the vocabulary. Maintains alphabetic order. Does not add /// duplicate entries. void addWord( char *word ) ; /// Returns the word given the index into the vocabulary char *getWord( int index ) ; /// Returns the index of a given word. If 'guess' is defined, then the /// words at indices of 'guess' and 'guess+1' are checked for a match /// before the rest of the vocab is searched. int getIndex( char *word , int guess=-1 ) ; #ifdef DEBUG void outputText() ; #endif }; } #endif torch3-3.1.orig/datasets/ExampleFrameSelectorDataSet.cc0000644000175000017500000001230410106445235023330 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "ExampleFrameSelectorDataSet.h" namespace Torch { ExampleFrameSelectorDataSet::ExampleFrameSelectorDataSet(DataSet *data_) { data = data_; if(data->n_examples == 0) error("ExampleFrameSelectorDataSet: cannot handle DataSet with no examples"); internal = NULL; DataSet::init(data->n_examples, data->n_inputs, data->n_targets); n_max_internal = n_examples; selected_example_size = n_examples; data->setExample(0); if(n_inputs > 0) inputs = new(allocator) Sequence(0, n_inputs); else inputs = NULL; if(n_targets > 0) targets = new(allocator) Sequence(0, n_targets); else targets = NULL; n_pushed_examples = 0; n_max_pushed_examples = 0; pushed_examples = NULL; n_max_internal = 0; n_examples = 0; n_real_examples = n_examples; } void ExampleFrameSelectorDataSet::addExample(int t, int inputs_start_indices_, int n_inputs_frames_, int targets_start_indices_, int n_targets_frames_) { int index = n_examples++; n_real_examples = n_examples; if(n_examples > n_max_internal){ n_max_internal++; internal = (InternalAMoi *)allocator->realloc(internal, sizeof(InternalAMoi)*(n_max_internal)); } internal[index].data_index = t; internal[index].start_inputs_frame = inputs_start_indices_; internal[index].start_targets_frame = targets_start_indices_; internal[index].n_selected_inputs_frames = n_inputs_frames_; internal[index].n_selected_targets_frames = n_targets_frames_; if(n_examples >= selected_example_size){ selected_example_size *= 2; allocator->free(selected_examples); selected_examples = (int *)allocator->alloc(sizeof(int)*selected_example_size); for(int i = 0; i < selected_example_size; i++) selected_examples[i] = i; } } void ExampleFrameSelectorDataSet::preProcess(PreProcessing *pre_processing) { error("ExampleFrameSelectorDataSet: preProcess() not supported !!"); } void ExampleFrameSelectorDataSet::removeExample(int t){ InternalAMoi* ptr = internal+t; InternalAMoi* to_copy_ptr = internal+t+1; for(int i=t;isetExample(ptr->data_index, set_inputs, set_targets); if(set_inputs){ inputs->resize(ptr->n_selected_inputs_frames, false); for(int i = 0; i < ptr->n_selected_inputs_frames; i++) inputs->frames[i] = data->inputs->frames[ptr->start_inputs_frame+i]; } if(set_targets) { targets->resize(ptr->n_selected_targets_frames, false); for(int i = 0; i < ptr->n_selected_targets_frames; i++) targets->frames[i] = data->targets->frames[ptr->start_targets_frame+i]; } real_current_example_index = t; } void ExampleFrameSelectorDataSet::pushExample() { data->pushExample(); pushed_examples->push(&inputs, sizeof(Sequence *)); pushed_examples->push(&targets, sizeof(Sequence *)); pushed_examples->push(&real_current_example_index, sizeof(int)); if(n_inputs > 0) inputs = new(allocator) Sequence(0, n_inputs); if(n_targets > 0) targets = new(allocator) Sequence(0, n_targets); real_current_example_index = -1; } void ExampleFrameSelectorDataSet::popExample() { allocator->free(inputs); allocator->free(targets); pushed_examples->pop(); pushed_examples->pop(); pushed_examples->pop(); data->popExample(); } ExampleFrameSelectorDataSet::~ExampleFrameSelectorDataSet() { } } torch3-3.1.orig/datasets/ExampleFrameSelectorDataSet.h0000644000175000017500000000600010106445235023166 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef EXAMPLE_FRAME_SELECTOR_DATA_SET_INC #define EXAMPLE_FRAME_SELECTOR_DATA_SET_INC #include "DataSet.h" namespace Torch { struct InternalAMoi{ int data_index; int start_inputs_frame; int start_targets_frame; int n_selected_inputs_frames; int n_selected_targets_frames; }; /** This dataset is empty at the begining. Each subsequence of the original dataset can be added/remove by the methods: \begin{itemize} \item addExample \item removeExample \end{itemize} @see FrameSelectorDataSet @author Johnny Mariethoz (Johnny.Mariethoz@idiap.ch) */ class ExampleFrameSelectorDataSet : public DataSet { private: int n_pushed_examples; int n_max_pushed_examples; InternalAMoi* internal; int n_max_internal; int selected_example_size; public: DataSet *data; int *n_selected_input_frames; int *n_selected_target_frames; int **input_frames_indices; int **target_frames_indices; /// ExampleFrameSelectorDataSet(DataSet *data_); void addExample(int t, int inputs_start_indices_, int n_inputs_frames_, int targets_start_indices_, int n_targets_frames_); void removeExample(int t); virtual void getNumberOfFrames(int t_, int *n_input_frames_, int *n_target_frames_); virtual void setRealExample(int t, bool set_inputs, bool set_targets); virtual void pushExample(); virtual void popExample(); virtual void preProcess(PreProcessing *pre_processing); //----- virtual ~ExampleFrameSelectorDataSet(); }; } #endif torch3-3.1.orig/datasets/LICENSE0000644000175000017500000000271110106445235016512 0ustar kalfakalfa00000000000000 Copyright (c) 2003--2004 Ronan Collobert Copyright (c) 2003--2004 Samy Bengio Copyright (c) 2003--2004 Johnny Mariéthoz All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. torch3-3.1.orig/decoder/0000755000175000017500000000000010106445236015302 5ustar kalfakalfa00000000000000torch3-3.1.orig/decoder/BeamSearchDecoder.cc0000644000175000017500000007030410106445236021075 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Allocator.h" #include "BeamSearchDecoder.h" namespace Torch { BeamSearchDecoder::BeamSearchDecoder( LinearLexicon *lexicon_ , LanguageModel *lang_model_ , real log_word_entrance_penalty_ , real word_int_beam_ , real word_end_beam_ , bool delayed_lm_ , bool verbose_mode_ ) { if ( lexicon_ == NULL ) error("BeamSearchDecoder::BeamSearchDecoder - no lexicon defined\n") ; lexicon = lexicon_ ; vocabulary = lexicon->lex_info->vocabulary ; lang_model = lang_model_ ; phone_models = lexicon->phone_models ; n_frames = 0 ; log_word_entrance_penalty = log_word_entrance_penalty_ ; verbose_mode = verbose_mode_ ; delayed_lm = delayed_lm_ ; word_state_hyps_1 = (DecodingHypothesis ***)Allocator::sysAlloc( lexicon->n_models * sizeof(DecodingHypothesis **) ) ; word_state_hyps_2 = (DecodingHypothesis ***)Allocator::sysAlloc( lexicon->n_models * sizeof(DecodingHypothesis **) ) ; word_end_hyps_1 = (DecodingHypothesis **)Allocator::sysAlloc( lexicon->n_models * sizeof(DecodingHypothesis *) ) ; word_end_hyps_2 = (DecodingHypothesis **)Allocator::sysAlloc( lexicon->n_models * sizeof(DecodingHypothesis *) ) ; word_entry_hyps_1 = (DecodingHypothesis **)Allocator::sysAlloc( lexicon->n_models * sizeof(DecodingHypothesis *) ) ; word_entry_hyps_2 = (DecodingHypothesis **)Allocator::sysAlloc( lexicon->n_models * sizeof(DecodingHypothesis *) ) ; curr_word_hyps = word_state_hyps_1 ; prev_word_hyps = word_state_hyps_2 ; curr_word_end_hyps = word_end_hyps_1 ; prev_word_end_hyps = word_end_hyps_2 ; curr_word_entry_hyps = word_entry_hyps_1 ; prev_word_entry_hyps = word_entry_hyps_2 ; for ( int w=0 ; wn_models ; w++ ) { word_state_hyps_1[w] = (DecodingHypothesis **)Allocator::sysAlloc( lexicon->nStatesInModel(w) * sizeof(DecodingHypothesis *) ) ; word_state_hyps_2[w] = (DecodingHypothesis **)Allocator::sysAlloc( lexicon->nStatesInModel(w) * sizeof(DecodingHypothesis *) ) ; for ( int s=0 ; snStatesInModel(w) ; s++ ) { word_state_hyps_1[w][s] = new DecodingHypothesis() ; word_state_hyps_1[w][s]->initHyp( w , s ) ; word_state_hyps_2[w][s] = new DecodingHypothesis() ; word_state_hyps_2[w][s]->initHyp( w , s ) ; } word_end_hyps_1[w] = word_state_hyps_1[w][lexicon->nStatesInModel(w)-1] ; word_end_hyps_2[w] = word_state_hyps_2[w][lexicon->nStatesInModel(w)-1] ; word_entry_hyps_1[w] = word_state_hyps_1[w][0] ; word_entry_hyps_2[w] = word_state_hyps_2[w][0] ; } sent_start_index = lexicon->lex_info->sent_start_index ; sent_end_index = lexicon->lex_info->sent_end_index ; max_interior_score = LOG_ZERO ; best_word_end_hyp = NULL ; if ( word_int_beam_ > 0.0 ) word_int_beam = word_int_beam_ ; else word_int_beam = -LOG_ZERO ; if ( word_end_beam_ > 0.0 ) word_end_beam = word_end_beam_ ; else word_end_beam = -LOG_ZERO ; } BeamSearchDecoder::~BeamSearchDecoder() { resetHypotheses() ; if ( word_state_hyps_1 != NULL ) { for ( int w=0 ; wn_models ; w++ ) { for ( int s=0 ; snStatesInModel(w) ; s++ ) delete word_state_hyps_1[w][s] ; free( word_state_hyps_1[w] ) ; } free( word_state_hyps_1 ) ; } if ( word_state_hyps_2 != NULL ) { for ( int w=0 ; wn_models ; w++ ) { for ( int s=0 ; snStatesInModel(w) ; s++ ) delete word_state_hyps_2[w][s] ; free( word_state_hyps_2[w] ) ; } free( word_state_hyps_2 ) ; } if ( word_end_hyps_1 != NULL ) free( word_end_hyps_1 ) ; if ( word_end_hyps_2 != NULL ) free( word_end_hyps_2 ) ; if ( word_entry_hyps_1 != NULL ) free( word_entry_hyps_1 ) ; if ( word_entry_hyps_2 != NULL ) free( word_entry_hyps_2 ) ; } void BeamSearchDecoder::decode( real **input_data , int n_frames_ , int *num_result_words , int **result_words , int **result_words_times ) { int temp_words[5000] , temp_times[5000] ; DecodingHypothesis *curr_hyp ; real score ; #ifdef DEBUG if ( (num_result_words==NULL) || (result_words==NULL) || (result_words_times==NULL) ) error("BeamSearchDecoder::decode - Result variables are NULL\n") ; #endif n_frames = n_frames_ ; // Initialise the hypothesis buffers and queues. init() ; // process the inputs for ( int t=0 ; tsetInputVector( input_data[t] ) ; // Process the transitions between states inside words. processWordInteriorStates() ; if ( t == (n_frames-1) ) { // We've reached the end of the input data - no need to evaluate word transitions. if ( verbose_mode == true ) fprintf( stderr , "\r \r") ; break ; } // If there is a language model, then tune the word-end hypotheses (that remain // after pruning) using the language model. // After that, evaluate word transitions. if ( lang_model != NULL ) { if ( delayed_lm == false ) processWordTransitionsLM( t ) ; else { applyLMProbs() ; processWordTransitionsNoLM( t ) ; } } else processWordTransitionsNoLM( t ) ; // We now have hypotheses for the initial states of all possible next words. // These hypotheses cannot remain in the (non-emitting) initial states. // We have to consider transitions from each initial state to all possible // (emitting) successor states and see if the word entry hypothesis score is better // than the current hypothesis in the successor state. // If so, we update the hypothesis in the successor state using the word entry // hypothesis. processWordEntryHypotheses() ; } if ( sent_end_index >= 0 ) { // We look at the hypothesis that is in the final state of the sentence end pronunciation. curr_hyp = curr_word_end_hyps[sent_end_index] ; } else { if ( (sent_start_index >= 0) && (best_word_end_hyp==curr_word_end_hyps[sent_start_index]) ) { // Cannot have start word as end of sentence word. // Find the next best. score = LOG_ZERO ; curr_hyp = NULL ; for ( int i=0 ; in_models ; i++ ) { if ( i == sent_start_index ) continue ; if ( curr_word_end_hyps[i]->score > score ) { score = curr_word_end_hyps[i]->score ; curr_hyp = curr_word_end_hyps[i] ; } } } else curr_hyp = best_word_end_hyp ; } if ( (curr_hyp == NULL) || (curr_hyp->score <= LOG_ZERO) ) { // There is no hypothesis that is in the final state of the sentence end model. *num_result_words = 0 ; *result_words = NULL ; *result_words_times = NULL ; return ; } // Allocate memory for the result array (ie. array of indices corresponding to words // in the lexicon). WordChainElem *temp_elem = curr_hyp->word_level_info ; *num_result_words = 0 ; while ( temp_elem != NULL ) { temp_words[*num_result_words] = temp_elem->word ; temp_times[*num_result_words] = temp_elem->word_start_frame ; temp_elem = temp_elem->prev_elem ; (*num_result_words)++ ; } *result_words = (int *)Allocator::sysAlloc( (*num_result_words) * sizeof(int) ) ; *result_words_times = (int *)Allocator::sysAlloc( (*num_result_words) * sizeof(int) ) ; for ( int w=0 ; w<(*num_result_words) ; w++ ) { (*result_words)[w] = temp_words[(*num_result_words)-w-1] ; (*result_words_times)[w] = temp_times[(*num_result_words)-w-1] ; } } void BeamSearchDecoder::resetHypotheses() { // Reset the scores of the new state hypotheses buffers // If the decoder has already been used, only the curr_word_hyps will // contain active hypotheses that need to be deactivated. for ( int w=0 ; wn_models ; w++ ) { for ( int s=0 ; s<(lexicon->nStatesInModel(w)) ; s++ ) { curr_word_hyps[w][s]->deactivate() ; // word_state_hyps_1[w][s]->deactivate() ; // word_state_hyps_2[w][s]->deactivate() ; } } } void BeamSearchDecoder::swapHypBuffers() { // Swap buffers if ( curr_word_hyps == word_state_hyps_1 ) { curr_word_hyps = word_state_hyps_2 ; prev_word_hyps = word_state_hyps_1 ; curr_word_end_hyps = word_end_hyps_2 ; prev_word_end_hyps = word_end_hyps_1 ; curr_word_entry_hyps = word_entry_hyps_2 ; prev_word_entry_hyps = word_entry_hyps_1 ; } else { curr_word_hyps = word_state_hyps_1 ; prev_word_hyps = word_state_hyps_2 ; curr_word_end_hyps = word_end_hyps_1 ; prev_word_end_hyps = word_end_hyps_2 ; curr_word_entry_hyps = word_entry_hyps_1 ; prev_word_entry_hyps = word_entry_hyps_2 ; } if ( verbose_mode == true ) { fprintf( stderr , ": " ) ; fflush(stderr) ; } } void BeamSearchDecoder::processWordInteriorStates() { DecodingHypothesis *prev_hyp ; real emission_prob , new_score , *suc_log_trans_probs , int_prune_thresh ; real temp_int_prune_thresh , temp_end_prune_thresh , max_end_score ; int n_processed=0 , n_states_minus_one ; short n_sucs , *sucs ; // Process the interior state hypotheses for the "normal" lexicon words. int_prune_thresh = max_interior_score - word_int_beam ; max_interior_score = LOG_ZERO ; max_end_score = LOG_ZERO ; temp_int_prune_thresh = LOG_ZERO ; temp_end_prune_thresh = LOG_ZERO ; best_word_end_hyp = NULL ; for ( int w=0 ; wn_models ; w++ ) { n_states_minus_one = lexicon->nStatesInModel(w) - 1 ; for ( int s=1 ; sscore <= LOG_ZERO ) continue ; #ifdef DEBUG // We assume from this point on that the word/state field in the hypothesis // correspond to the indices in the nested loops (s & w). Check that this is so. if ( (prev_hyp->word != w) || (prev_hyp->state != s) ) error("BeamSearchDecoder::processWordIntStates - word-state index mismatch\n") ; #endif if ( w == sent_end_index ) { // We don't want to prune any of the sentence end hypotheses. emission_prob = lexicon->calcEmissionProb( w , s ) ; lexicon->getSuccessorInfo( w , s , &n_sucs , &sucs , &suc_log_trans_probs ) ; for ( int suc=0 ; sucscore + emission_prob + suc_log_trans_probs[suc] ; if ( new_score > curr_word_hyps[w][sucs[suc]]->score ) curr_word_hyps[w][sucs[suc]]->extendState( prev_hyp , new_score ) ; } } else if ( prev_hyp->score >= int_prune_thresh ) { n_processed++ ; // Retrieve/calculate the emission probability for the current state. emission_prob = lexicon->calcEmissionProb( w , s ) ; // The hypothesis we've just retrieved is for a particular word, w, // and state, sprev. // See if a path through (w,sprev) improves the current hypothesis for // every (next) state, s, of word w. lexicon->getSuccessorInfo( w , s , &n_sucs , &sucs , &suc_log_trans_probs ) ; for ( int suc=0 ; sucscore + emission_prob + suc_log_trans_probs[suc] ; if ( sucs[suc] == n_states_minus_one ) { // The final state is a special case. If we have a language model, // then we want to prune word end hyps before we apply LM probs. // If we don't have a language model, then we only need to keep // track of the most likely word end. if ( lang_model != NULL ) { if ( (new_score >= temp_end_prune_thresh) && (new_score > curr_word_hyps[w][n_states_minus_one]->score ) ) { if ( new_score > max_end_score ) { best_word_end_hyp = curr_word_hyps[w][n_states_minus_one] ; max_end_score = new_score ; temp_end_prune_thresh = new_score - word_end_beam ; } curr_word_hyps[w][n_states_minus_one]->extendState( prev_hyp , new_score ) ; } } else { if ( new_score > max_end_score ) { if ( best_word_end_hyp != NULL ) best_word_end_hyp->deactivate() ; best_word_end_hyp = curr_word_hyps[w][n_states_minus_one] ; max_end_score = new_score ; curr_word_hyps[w][n_states_minus_one]->extendState( prev_hyp , new_score ) ; } } } else { if ( new_score > curr_word_hyps[w][sucs[suc]]->score ) { if ( new_score >= temp_int_prune_thresh ) { if ( new_score > max_interior_score ) { max_interior_score = new_score ; temp_int_prune_thresh = new_score - word_int_beam ; } curr_word_hyps[w][sucs[suc]]->extendState( prev_hyp , new_score ); } } } } } // We've finished with this hypothesis, so deactivate it. prev_hyp->deactivate() ; } } if ( verbose_mode == true ) { fprintf( stderr , "%d," , n_processed ) ; fflush(stderr) ; } } void BeamSearchDecoder::applyLMProbs() { real temp_end_prune_thresh , best_score , score ; if ( best_word_end_hyp != NULL ) temp_end_prune_thresh = best_word_end_hyp->score - word_end_beam ; else temp_end_prune_thresh = LOG_ZERO ; best_word_end_hyp = NULL ; best_score = LOG_ZERO ; for ( int i=0 ; in_models ; i++ ) { if ( i == sent_end_index ) { curr_word_end_hyps[i]->deactivate() ; continue ; } score = curr_word_end_hyps[i]->score ; if ( score > LOG_ZERO ) { if ( score < temp_end_prune_thresh ) curr_word_end_hyps[i]->deactivate() ; else { score += lang_model->calcLMProb( curr_word_end_hyps[i] ) ; if ( score > best_score ) { curr_word_end_hyps[i]->score = score ; best_score = score ; if ( best_word_end_hyp != NULL ) best_word_end_hyp->deactivate() ; best_word_end_hyp = curr_word_end_hyps[i] ; } else curr_word_end_hyps[i]->deactivate() ; } } } } void BeamSearchDecoder::processWordTransitionsLM( int curr_frame ) { real prob ; int *pronuns , n_pronuns , n_processed ; WordChainElem *next_word_chain_elem ; real temp_end_prune_thresh ; if ( best_word_end_hyp != NULL ) temp_end_prune_thresh = best_word_end_hyp->score - word_end_beam ; else temp_end_prune_thresh = LOG_ZERO ; n_processed=0 ; for ( int i=0 ; in_models ; i++ ) { if ( i == sent_end_index ) { curr_word_end_hyps[i]->deactivate() ; continue ; } if ( curr_word_end_hyps[i]->score <= LOG_ZERO ) continue ; if ( curr_word_end_hyps[i]->score < temp_end_prune_thresh ) { curr_word_end_hyps[i]->deactivate() ; continue ; } n_processed++ ; for ( int w=0 ; wn_words ; w++ ) { if ( (w == vocabulary->sent_end_index) && (i == sent_start_index) ) continue ; prob = log_word_entrance_penalty + curr_word_end_hyps[i]->score + lang_model->calcLMProb( curr_word_end_hyps[i] , w ) ; pronuns = lexicon->lex_info->vocab_to_lex_map[w].pronuns ; n_pronuns = lexicon->lex_info->vocab_to_lex_map[w].n_pronuns ; next_word_chain_elem = DecodingHypothesis::word_chain_elem_pool.getElem( w , curr_word_end_hyps[i]->word_level_info , curr_frame ) ; for ( int p=0 ; p curr_word_entry_hyps[pronuns[p]]->score ) curr_word_entry_hyps[pronuns[p]]->extendWord( prob , next_word_chain_elem ) ; } if ( next_word_chain_elem->n_connected <= 0 ) DecodingHypothesis::word_chain_elem_pool.returnElem( next_word_chain_elem ) ; } curr_word_end_hyps[i]->deactivate() ; } if ( verbose_mode == true ) { fprintf( stderr , "%d " , n_processed ) ; fflush(stderr) ; } } void BeamSearchDecoder::processWordTransitionsNoLM( int curr_frame ) { // The best_word_end_hyp member points to the best word end hypothesis. int *pronuns , n_pronuns ; WordChainElem *next_word_chain_elem ; real score ; if ( verbose_mode == true ) { fprintf( stderr , ":" ) ; fflush(stderr) ; } // Now extend the best word end hypothesis to the initial states of all // words and the initial state of the sentence end word. // If the best word end hyp was the final state of the sentence end hypothesis // then we don't extend it to any other words. if ( best_word_end_hyp != NULL ) { score = best_word_end_hyp->score + log_word_entrance_penalty ; for ( int w=0 ; wn_words ; w++ ) { if ( (w == vocabulary->sent_end_index) && (sent_start_index >= 0) && (best_word_end_hyp == curr_word_end_hyps[sent_start_index]) ) continue ; // A start-to-end transition is invalid pronuns = lexicon->lex_info->vocab_to_lex_map[w].pronuns ; n_pronuns = lexicon->lex_info->vocab_to_lex_map[w].n_pronuns ; #ifdef DEBUG if ( n_pronuns == 0 ) error("BeamSearchDecoder::processWordTransNoLM - voc word %d has no pronuns\n",w); #endif next_word_chain_elem = DecodingHypothesis::word_chain_elem_pool.getElem( w , best_word_end_hyp->word_level_info , curr_frame) ; for ( int p=0 ; pscore > LOG_ZERO ) error("BeamSearchDecoder::processWordTransNoLM - word entry hyp not reset\n") ; #endif curr_word_entry_hyps[pronuns[p]]->extendWord( score , next_word_chain_elem ) ; } if ( next_word_chain_elem->n_connected <= 0 ) DecodingHypothesis::word_chain_elem_pool.returnElem( next_word_chain_elem ) ; } } // Deactivate the best word-end hypotheses and the sentence end word-end hypothesis. if ( best_word_end_hyp != NULL ) best_word_end_hyp->deactivate() ; best_word_end_hyp = NULL ; if ( sent_end_index >= 0 ) curr_word_end_hyps[sent_end_index]->deactivate() ; } void BeamSearchDecoder::processWordEntryHypotheses() { DecodingHypothesis *curr_hyp ; short n_sucs , *sucs ; real *suc_log_trans_probs , new_score , temp_prune_thresh ; temp_prune_thresh = max_interior_score - word_int_beam ; for ( int w=0 ; wn_models ; w++ ) { curr_hyp = curr_word_entry_hyps[w] ; if ( curr_hyp->score <= LOG_ZERO ) { curr_hyp->deactivate() ; continue ; } // For each successor state, s, for the initial state of word, w, is our hyposthesis // improved if we consider the best word boundary hypothesis ? // (ie. Is there a better path ending in state s that comes in through a word boundary?) lexicon->getSuccessorInfo( w , 0 , &n_sucs , &sucs , &suc_log_trans_probs ) ; for ( int s=0 ; sscore + suc_log_trans_probs[s] ; if ( new_score > curr_word_hyps[w][sucs[s]]->score ) { if ( w == sent_end_index ) { // We don't want to prune sentence end hypotheses. curr_word_hyps[w][sucs[s]]->extendState( curr_hyp , new_score ) ; } else if ( new_score >= temp_prune_thresh ) { if ( new_score > max_interior_score ) { max_interior_score = new_score ; temp_prune_thresh = max_interior_score - word_int_beam ; } curr_word_hyps[w][sucs[s]]->extendState( curr_hyp , new_score ) ; } } } curr_hyp->deactivate() ; } } void BeamSearchDecoder::init() { short n_sucs , *sucs ; int n_pronuns , *pronuns ; real *suc_log_trans_probs ; real new_score ; WordChainElem *next_word_chain_elem ; // Reset all hypotheses. resetHypotheses() ; max_interior_score = LOG_ZERO ; best_word_end_hyp = NULL ; // If there is a sentence start word defined, initialise just the initial state // of the sentence start pronun. if ( sent_start_index >= 0 ) { next_word_chain_elem = DecodingHypothesis::word_chain_elem_pool.getElem( vocabulary->sent_start_index , NULL , 0) ; curr_word_hyps[sent_start_index][0]->extendWord( 0.0 , next_word_chain_elem ) ; // Extend to the successor states of the initial state of the sentence start pronun. lexicon->getSuccessorInfo( sent_start_index , 0 , &n_sucs , &sucs , &suc_log_trans_probs ) ; for ( int s=0 ; sscore + suc_log_trans_probs[s] ; curr_word_hyps[sent_start_index][sucs[s]]->extendState( curr_word_hyps[sent_start_index][0] , new_score ) ; } curr_word_hyps[sent_start_index][0]->deactivate() ; } else { // There is no sentence start pronunciation defined. // Initialise hypotheses for the initial states of all models // in the lexicon (except the sent end word if defined). for ( int w=0 ; wn_words ; w++ ) { next_word_chain_elem = DecodingHypothesis::word_chain_elem_pool.getElem( w, NULL, 0 ) ; n_pronuns = lexicon->lex_info->vocab_to_lex_map[w].n_pronuns ; pronuns = lexicon->lex_info->vocab_to_lex_map[w].pronuns ; for ( int p=0 ; pextendWord( 0.0 , next_word_chain_elem ) ; } if ( next_word_chain_elem->n_connected <= 0 ) DecodingHypothesis::word_chain_elem_pool.returnElem( next_word_chain_elem ) ; } // Now go through all models and extend the intial state hypotheses. for ( int m=0 ; mn_models ; m++ ) { if ( m == sent_end_index ) continue ; lexicon->getSuccessorInfo( m , 0 , &n_sucs , &sucs , &suc_log_trans_probs ) ; for ( int s=0 ; sscore + suc_log_trans_probs[s] ; curr_word_hyps[m][sucs[s]]->extendState( curr_word_hyps[m][0] , new_score ) ; } curr_word_hyps[m][0]->deactivate() ; } } } } torch3-3.1.orig/decoder/BeamSearchDecoder.h0000644000175000017500000001104610106445236020735 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef BEAMSEARCHDECODER_INC #define BEAMSEARCHDECODER_INC #include "general.h" #include "DecodingHypothesis.h" #include "PhoneModels.h" #include "LinearLexicon.h" #include "LanguageModel.h" #include "Vocabulary.h" #include "log_add.h" namespace Torch { /** This class implements a Viterbi decoder with beam search capabilities. A Lexicon and LanguageModel are required at creation time (the LanguageModel is optional). By default, no pruning occurs. Two levels of pruning can be configured - word interior hypothesis pruning and word end hypothesis pruning. The application of language model probabilities can be delayed or performed normally. @author Darren Moore (moore@idiap.ch) */ class BeamSearchDecoder { public: PhoneModels *phone_models ; LinearLexicon *lexicon ; LanguageModel *lang_model ; Vocabulary *vocabulary ; int n_frames ; real log_word_entrance_penalty ; real word_int_beam ; real word_end_beam ; DecodingHypothesis ***word_state_hyps_1 ; DecodingHypothesis ***word_state_hyps_2 ; DecodingHypothesis **word_end_hyps_1 ; DecodingHypothesis **word_end_hyps_2 ; DecodingHypothesis **word_entry_hyps_1 ; DecodingHypothesis **word_entry_hyps_2 ; DecodingHypothesis ***curr_word_hyps ; DecodingHypothesis ***prev_word_hyps ; DecodingHypothesis **curr_word_entry_hyps ; DecodingHypothesis **prev_word_entry_hyps ; DecodingHypothesis **curr_word_end_hyps ; DecodingHypothesis **prev_word_end_hyps ; int sent_start_index ; int sent_end_index ; bool verbose_mode ; real max_interior_score ; DecodingHypothesis *best_word_end_hyp ; bool delayed_lm ; /* Constructors/destructor */ BeamSearchDecoder( LinearLexicon *lexicon_ , LanguageModel *lang_model_ , real log_word_entrance_penalty_=0.0 , real word_int_beam_=LOG_ZERO , real word_end_beam_=LOG_ZERO , bool delayed_lm_=true , bool verbose_mode_=false ) ; virtual ~BeamSearchDecoder() ; /* Methods */ /// Decodes using the input data vectors in 'input_data'. /// 'n_frames_' is the number of vectors of input data. /// 'vec_size' is the number of elements in each vector. The input data can be either /// features or emission probabilities and 'vec_size' must reflect this. /// After this function returns, 'num_result_words' contains the number of words /// recognised and 'result_words' contains the vocabulary indices of the recognised /// words. The 'result_words' array is allocated inside this function. void decode( real **input_data , int n_frames_ , int *num_result_words , int **result_words , int **result_words_times ) ; void resetHypotheses() ; void swapHypBuffers() ; void processWordInteriorStates() ; void applyLMProbs() ; void processWordTransitionsLM( int curr_frame ) ; void processWordTransitionsNoLM( int curr_frame ) ; void processWordEntryHypotheses() ; void init() ; } ; } #endif torch3-3.1.orig/decoder/DecoderBatchTest.cc0000644000175000017500000004773610106445236021001 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Allocator.h" #include "DecoderBatchTest.h" #include "EditDistance.h" #include "time.h" #include "string_stuff.h" #include "DiskXFile.h" namespace Torch { DecoderBatchTest::DecoderBatchTest( char *datafiles_filename , DSTDataFileFormat datafiles_format , char *expected_results_file , BeamSearchDecoder *decoder_ , bool remove_sil , bool output_res , char *out_fname , bool output_ctm , real frame_msec_step_size ) { clock_t start_time , end_time ; if ( decoder_ == NULL ) error("DBT::DBT - decoder_ is NULL\n") ; if ( (datafiles_filename == NULL) || (strcmp(datafiles_filename,"")==0) ) error("DBT::DBT - datafiles_filename is undefined\n") ; total_time = 0.0 ; start_time = clock() ; decoder = decoder_ ; vocabulary = decoder->vocabulary ; n_tests = 0 ; tests = NULL ; archive_fd = NULL ; if ( (expected_results_file == NULL) || (strcmp(expected_results_file,"")==0) ) have_expected_results = false ; else have_expected_results = true ; // Open the results file if ( (out_fname != NULL) && (strcmp(out_fname,"")!=0) ) { if ( (output_fd=fopen( out_fname , "w" )) == NULL ) error("DBT::DBT - error opening output file\n") ; } else output_fd = NULL ; if ( (datafiles_format == DST_FEATS_PFILE_ARCHIVE) || (datafiles_format == DST_PROBS_LNA8BIT_ARCHIVE) || (datafiles_format == DST_FEATS_ONLINE_FTRS_ARCHIVE) ) { // 'datafiles_filename' is the actual feature or prob archive file for all test // sentences. 'expected_results_file' (if specified) contains the ground truth // transcriptions in "raw" format - 1 sentence per line with the order of // sentences matching the order in the archive file. configureWithArchiveInput( datafiles_filename , datafiles_format , expected_results_file , remove_sil , output_res , output_ctm , frame_msec_step_size ) ; } else { // We have a separate input file for each test (eg. separate lna file for // each sentence). 'datafiles_filename' should contain a list of absolute // pathnames of the input files for all tests (1 abs filename per line). // 'expected_results_file' is the ground truth transcriptions for each test // in HTK MLF format (see below) (filenames must be absolute and must have // a 3 letter extension (the extension gets ignored)). configureWithIndividualInputs( datafiles_filename , datafiles_format , expected_results_file , remove_sil , output_res , output_ctm , frame_msec_step_size ) ; } end_time = clock() ; total_time = (real)(end_time-start_time) / CLOCKS_PER_SEC ; } DecoderBatchTest::~DecoderBatchTest() { if ( archive_fd != NULL ) fclose( archive_fd ) ; if ( output_fd != NULL ) fclose( output_fd ) ; if ( tests != NULL ) { for ( int i=0 ; idecode_time ; if ( tests[i]->actual_words != NULL ) { singleRes.distance( tests[i]->actual_words , tests[i]->n_actual_words , tests[i]->expected_words , tests[i]->n_expected_words ) ; totalRes.add( &singleRes ) ; } } printf("\nTotal time spent actually decoding = %.2f secs\n",decode_time) ; printf("Total time spent configuring and running batch test = %.2f secs\n\n",total_time) ; DiskXFile xf(stdout); totalRes.print( &xf ) ; totalRes.printRatio( &xf ) ; printf("\n") ; } void DecoderBatchTest::run() { clock_t start_time , end_time ; start_time = clock() ; for ( int i=0 ; irun( decoder , archive_fd ) ; } end_time = clock() ; total_time += (real)(end_time-start_time) / CLOCKS_PER_SEC ; if ( have_expected_results == true ) printStatistics(7,7,10) ; // HTK settings for Ins, Sub, Del calculations } void DecoderBatchTest::configureWithIndividualInputs( char *datafiles_filename , DSTDataFileFormat datafiles_format , char *expected_results_file , bool remove_sil , bool output_res , bool output_ctm , real frame_msec_step_size ) { FILE *datafiles_fd=NULL , *results_fd=NULL ; char line[1000] , fname[1000] , result_fname[1000] , res_word[1000] , *ptr ; int temp_result_list[1000] , n_sentence_words=0 , i=0 , test_index , word_cnt ; char **filenames=NULL ; bool have_mlf=false ; // Open the file containing the names of the data files we want to run tests for. datafiles_fd = fopen( datafiles_filename , "r" ) ; if ( datafiles_fd == NULL ) error("DecoderBatchTest::configure - error opening datafiles file") ; // Open the file containing the expected results for each test. // We assume that the format is as per HTK MLF format. // Note that the filename line must be enclosed in "". if ( have_expected_results == true ) { if ( (results_fd = fopen( expected_results_file , "r" )) == NULL ) error("DecoderBatchTest::configureWII - error opening results file") ; // Read the first line of the results file to determine its type fgets( line , 1000 , results_fd ) ; if ( strstr( line , "MLF" ) ) have_mlf = true ; else { have_mlf = false ; fseek( results_fd , 0 , SEEK_SET ) ; } } // Determine the number of filenames present in the datafiles file n_tests=0 ; while ( fgets( line , 1000 , datafiles_fd ) != NULL ) { if ( (sscanf(line,"%s",fname)==0) || (line[0] == '#') || (line[0] == '\n') || (line[0] == '\r') || (line[0] == ' ') || (line[0] == '\t') ) continue ; n_tests++ ; tests = (DecoderSingleTest **)Allocator::sysRealloc( tests , n_tests * sizeof(DecoderSingleTest *) ) ; tests[n_tests-1] = NULL ; filenames = (char **)Allocator::sysRealloc( filenames , n_tests * sizeof(char *) ) ; filenames[n_tests-1] = (char *)Allocator::sysAlloc( (strlen(fname)+1)*sizeof(char) ) ; strcpy( filenames[n_tests-1] , fname ) ; } if ( have_expected_results == true ) { // Read each entry in the expected results file, find its corresponding // filename in the temporary list of filename, create a DecoderSingleTest // instance and add it to the list of tests. test_index = 0 ; while ( fgets( line , 1000 , results_fd ) != NULL ) { if ( have_mlf == true ) { if ( sscanf(line,"\"%[^\"]",result_fname) != 0 ) { // remove the extension and path from the filename if ( (ptr=strrchr( result_fname , '/' )) != NULL ) memmove( result_fname , ptr+1 , strlen(ptr)+1 ) ; if ( (ptr=strrchr( result_fname , '.' )) != NULL ) *ptr = '\0' ; // find the filename in the temporary list of filenames for ( i=0 ; igetIndex(res_word) ; if ( temp_result_list[n_sentence_words] >= 0 ) n_sentence_words++ ; fgets( line , 1000 , results_fd ) ; } // Now configure the next DecoderSingleTest instance // with the details of the test. if ( tests[i] != NULL ) error("DecoderSingleTest::configureWII - duplicate exp results\n"); tests[i] = new DecoderSingleTest() ; tests[i]->configure( i , filenames[i] , n_sentence_words , temp_result_list , datafiles_format , decoder->phone_models , remove_sil , output_res , output_fd , output_ctm , frame_msec_step_size ) ; break ; } } } } else { // We have expected results in reference format // Extract the words in the sentence ptr = strtok( line , " \r\n\t" ) ; word_cnt = 0 ; while ( ptr != NULL ) { if ( (temp_result_list[word_cnt] = vocabulary->getIndex( ptr )) < 0 ) printf("DBT::cWAI - result word %s not in vocab for test %d\n",ptr,i+1) ; word_cnt++ ; ptr = strtok( NULL , " \r\n\t" ) ; } // Configure the DecoderSingleTest instance if ( test_index >= n_tests ) error("DecoderSingleTest::configureWII - test_index out of range\n"); if ( tests[test_index] != NULL ) error("DecoderSingleTest::configureWII - duplicate exp results\n"); tests[test_index] = new DecoderSingleTest() ; tests[test_index]->configure( test_index , filenames[test_index] , word_cnt , temp_result_list , datafiles_format , decoder->phone_models , remove_sil , output_res , output_fd , output_ctm , frame_msec_step_size ) ; test_index++ ; } } // Check that each element of 'tests' has been configured for ( i=0 ; in_expected_words ; j++ ) // printf("%s ",vocabulary->getWord( tests[i]->expected_words[j] ) ) ; //printf("\n"); } } else { for ( i=0 ; iconfigure( i , filenames[i] , 0 , NULL , datafiles_format , decoder->phone_models , remove_sil , output_res , output_fd , output_ctm , frame_msec_step_size ) ; } } // Free the temporary list of filenames for ( i=0 ; iphone_models->n_emission_probs , &offsets , &n_tests ) ; } else if ( archive_format == DST_FEATS_ONLINE_FTRS_ARCHIVE ) { findOnlineFtrsArchiveUtteranceOffsets( archive_fd , decoder->phone_models->n_features , &offsets , &n_tests ) ; } fseek( archive_fd , 0 , SEEK_SET ) ; // We now know how many tests (utterances) are in the archive, and the // offset into the archive where the utterance data resides. We can now // configure each test. tests = (DecoderSingleTest **)Allocator::sysAlloc( n_tests*sizeof(DecoderSingleTest *) ) ; for ( int i=0 ; igetIndex( ptr )) < 0 ) printf("DBT::cWAI - result word %s not in vocab for test %d\n",ptr,i+1) ; word_cnt++ ; ptr = strtok( NULL , " \r\n\t" ) ; } // Configure the DecoderSingleTest instance tests[i]->configure( i , offsets[i] , word_cnt , temp_result_list , archive_format , decoder->phone_models , remove_sil , output_res , output_fd , output_ctm , frame_msec_step_size ) ; } else { tests[i]->configure( i , offsets[i] , 0 , NULL , archive_format , decoder->phone_models , remove_sil , output_res , output_fd , output_ctm , frame_msec_step_size ) ; } } if ( offsets != NULL ) free( offsets ) ; if ( have_expected_results == true ) fclose(results_fd) ; } void DecoderBatchTest::findLNA8ArchiveUtteranceOffsets( FILE *arch_fd , int n_probs , long **offsets , int *n_utts ) { int count ; long pos=0 ; unsigned char buf[1000] ; bool got_end_word ; *n_utts = 0 ; *offsets = NULL ; got_end_word = true ; if ( n_probs <= 0 ) error("DBT::findLNA8ArchiveUtteranceOffsets - lna vector size unspecified\n") ; while ( (count=fread( buf , sizeof(unsigned char) , n_probs+1 , arch_fd )) == (n_probs+1) ) { if ( (got_end_word == true) && (buf[0] == 0x00) ) { (*n_utts)++ ; *offsets = (long *)Allocator::sysRealloc( (*offsets) , (*n_utts)*sizeof(long) ) ; (*offsets)[(*n_utts)-1] = pos ; got_end_word = false ; } else if ( buf[0] == 0x80 ) { if ( got_end_word == true ) error("DBT::findLNA8ArchiveUtteranceOffsets - double 0x80\n") ; got_end_word = true ; } else { if ( buf[0] != 0x00 ) error("DBT::findLNA8ArchiveUtteranceOffsets - first byte of line was not 0x00\n") ; } pos += (n_probs+1) ; #ifdef DEBUG if ( pos != ftell( arch_fd ) ) error("DBT::findLNA8ArchiveUtteranceOffsets - pos does not match ftell\n") ; #endif } } void DecoderBatchTest::findOnlineFtrsArchiveUtteranceOffsets( FILE *arch_fd , int n_feats , long **offsets , int *n_utts ) { int count , step_size ; long pos=0 ; unsigned char buf[1000] ; bool got_end_word ; *n_utts = 0 ; *offsets = NULL ; got_end_word = true ; if ( n_feats <= 0 ) error("DBT::findOnlineFtrsArchiveUtteranceOffsets - n_features unspecified\n") ; step_size = 1 + (n_feats * sizeof(float)) ; while ( (count=fread( buf , sizeof(unsigned char) , step_size , arch_fd )) == step_size ) { if ( (got_end_word == true) && (buf[0] == 0x00) ) { (*n_utts)++ ; *offsets = (long *)Allocator::sysRealloc( (*offsets) , (*n_utts)*sizeof(long) ) ; (*offsets)[(*n_utts)-1] = pos ; got_end_word = false ; } else if ( buf[0] == 0x80 ) { if ( got_end_word == true ) error("DBT::findOnlineFtrsArchiveUtteranceOffsets - double 0x80\n") ; got_end_word = true ; } else { if ( buf[0] != 0x00 ) error("DBT::findOnlineFtrsArchiveUtteranceOffsets - first byte was not 0x00\n") ; } pos += step_size ; #ifdef DEBUG if ( pos != ftell( arch_fd ) ) error("DBT::findLNA8ArchiveUtteranceOffsets - pos does not match ftell\n") ; #endif } } #ifdef DEBUG void DecoderBatchTest::outputText() { printf("Number of tests = %d\n",n_tests) ; for ( int i=0 ; ioutputText() ; printf("\n") ; } } #endif } torch3-3.1.orig/decoder/DecoderBatchTest.h0000644000175000017500000001262410106445236020627 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef DECODERBATCHTEST_INC #define DECODERBATCHTEST_INC #include "general.h" #include "BeamSearchDecoder.h" #include "Vocabulary.h" #include "DecoderSingleTest.h" namespace Torch { /** This class is used to decode a set of test files and display statistics and results for each file (ie. expected and actual words recognised) and also for the entire test set (ie. insertions, deletions, substitutions, accuracy, total time). @author Darren Moore (moore@idiap.ch) */ class DecoderBatchTest { public: BeamSearchDecoder *decoder ; Vocabulary *vocabulary ; int n_tests ; DecoderSingleTest **tests ; real total_time ; FILE *archive_fd ; FILE *output_fd ; bool have_expected_results ; /* Constructors / destructor */ /// Configures the batch test. /// 'datafiles_filename' is the file containing a list of the input data files /// that need to be decoded. Absolute pathnames are required. The decoding /// will occur in the order the files are listed in this file. /// 'expected_results_file' is the file containing the correct transcriptions /// for all input data files. The ordering of the files does not have to be /// the same as the order in 'datafiles_filename'. /// 'decoder_' is a pointer to the decoder to be used to perform the recognition. /// 'remove_sil' indicates whether the silence word is to be removed from the /// recognition results before statistics are calculated. /// 'output_res' indicates whether the result of each recognition is to be output /// immediately after it is obtained. /// 'preload_data' indicates whether all of the input data in all files is to be /// preloaded into memory before any decoding occurs (takes lots of memory and lots /// of time before any results are obtained if the number of input data files /// is large. /// 'pre_calc_emission_probs' indicates whether emission probabilities for /// all input files are to be calculated before the decoder is invoked. This /// only applies if 'preload_data' is true. DecoderBatchTest( char *datafiles_filename , DSTDataFileFormat datafiles_format , char *expected_results_file , BeamSearchDecoder *decoder_ , bool remove_sil=false , bool output_res=false , char *out_fname=NULL , bool output_ctm=false , real frame_msec_step_size=10.0 ) ; ~DecoderBatchTest() ; /* Methods */ void configureWithArchiveInput( char *archive_filename , DSTDataFileFormat archive_format , char *expected_results_file , bool remove_sil , bool output_res , bool output_ctm , real frame_msec_step_size ); void configureWithIndividualInputs( char *datafiles_filename , DSTDataFileFormat datafiles_format , char *expected_results_file , bool remove_sil , bool output_res , bool output_ctm , real frame_msec_step_size ) ; void findLNA8ArchiveUtteranceOffsets( FILE *arch_fd , int n_probs , long int **offsets , int *n_utts ) ; void findOnlineFtrsArchiveUtteranceOffsets( FILE *arch_fd , int n_feats , long int **offsets , int *n_utts ) ; /// Intended to be called after the 'run' method has returned. Processes the /// recognition results for all input data files and compiles insertions, /// deletions, substitions, accuracy and time statisitcs for the entire batch run. void printStatistics( int i_cost , int d_cost , int s_cost ) ; /// Runs the batch test according to the options set by the call to 'configure'. void run() ; #ifdef DEBUG void outputText() ; #endif } ; } #endif torch3-3.1.orig/decoder/DecoderSingleTest.cc0000644000175000017500000005441510106445236021171 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Allocator.h" #include "DecoderSingleTest.h" #include "IOHTK.h" #include "Sequence.h" #include "DiskXFile.h" #include "time.h" #include "sys/types.h" #include "sys/stat.h" namespace Torch { DecoderSingleTest::DecoderSingleTest() { test_filename = NULL ; test_id = -1 ; expected_words = NULL ; n_expected_words = 0 ; actual_words = NULL ; actual_words_times = NULL ; n_actual_words = 0 ; data_format = DST_NOFORMAT ; decode_time = 0.0 ; output_result = false ; remove_sent_marks = false ; output_ctm = false ; frames_per_msec = 0.0 ; archive_offset = -1 ; output_fd=NULL ; n_frames=0 ; n_features=0 ; n_emission_probs=0 ; decoder_input = NULL ; phone_models = NULL ; } DecoderSingleTest::~DecoderSingleTest() { if ( test_filename != NULL ) free( test_filename ) ; if ( expected_words != NULL ) free( expected_words ) ; if ( actual_words != NULL ) free( actual_words ) ; if ( actual_words_times != NULL ) free( actual_words_times ) ; if ( decoder_input != NULL ) { for ( int i=0 ; i0) && (expected_words_!=NULL) ) { n_expected_words = n_expected_words_ ; expected_words = (int *)Allocator::sysAlloc( n_expected_words * sizeof(int) ) ; memcpy( expected_words , expected_words_ , n_expected_words*sizeof(int) ) ; } else { n_expected_words = 0 ; expected_words = NULL ; } // If there are existing actual results - delete them if ( actual_words != NULL ) { free( actual_words ) ; actual_words = NULL ; } if ( actual_words_times != NULL ) { free( actual_words_times ) ; actual_words_times = NULL ; } n_actual_words = 0 ; data_format = data_format_ ; frames_per_msec = 1.0 / frame_ms_step_size ; output_ctm = output_ctm_ ; if ( output_ctm == true ) remove_sent_marks = false ; else remove_sent_marks = remove_sent_marks_ ; output_result = output_result_ ; if ( (output_fd = out_fd) == NULL ) output_fd = stdout ; } void DecoderSingleTest::configure( int test_id_ , long archive_offset_ , int n_expected_words_ , int *expected_words_ , DSTDataFileFormat data_format_ , PhoneModels *phone_models_ , bool remove_sent_marks_ , bool output_result_ , FILE *out_fd , bool output_ctm_ , real frame_ms_step_size ) { test_id = test_id_ ; test_filename = NULL ; if ( (phone_models = phone_models_) == NULL ) error("DST::configure(2) - phone_models is NULL\n") ; archive_offset = archive_offset_ ; // Allocate memory to hold the array of word indices that constitute the // expected result of the test and copy the results. if ( (n_expected_words_>0) && (expected_words_!=NULL) ) { n_expected_words = n_expected_words_ ; expected_words = (int *)Allocator::sysAlloc( n_expected_words * sizeof(int) ) ; memcpy( expected_words , expected_words_ , n_expected_words*sizeof(int) ) ; } else { n_expected_words = 0 ; expected_words = NULL ; } // If there are existing actual results - delete them if ( actual_words != NULL ) { free( actual_words ) ; actual_words = NULL ; } if ( actual_words_times != NULL ) { free( actual_words_times ) ; actual_words_times = NULL ; } n_actual_words = 0 ; data_format = data_format_ ; frames_per_msec = 1.0 / frame_ms_step_size ; output_ctm = output_ctm_ ; if ( output_ctm == true ) remove_sent_marks = false ; else remove_sent_marks = remove_sent_marks_ ; output_result = output_result_ ; if ( (output_fd = out_fd) == NULL ) output_fd = stdout ; } void DecoderSingleTest::run( BeamSearchDecoder *decoder , FILE *archive_fd ) { clock_t start_time , end_time ; int start_index = 0 ; // The data file hasn't been loaded yet - load it loadDataFile( archive_fd ) ; // Now look at the type of data that was in the file and compare it with the // type expected by the phone set. if ( ((n_emission_probs == 0) && (phone_models->input_vecs_are_features == false)) || ((n_features == 0) && (phone_models->input_vecs_are_features == true)) ) { // We've got feature vectors (or nothing), but the phone_models is expecting // vectors of emission probabilities (or vice versa). error("DecoderSingleTest::run - datafile format does not agree with phone_models\n") ; } if ( (n_features != phone_models->n_features) && (n_emission_probs != phone_models->n_emission_probs) ) { error("DecoderSingleTest::run - input vector size does not agree with phone_models\n") ; } // If the input vectors are features and we are calculating emission probs // using an MLP, we need to initialise the context window of the MLP. if ( (phone_models->input_vecs_are_features == true) && (phone_models->mlp != NULL) ) start_index = phone_models->mlp->initContextWindow( decoder_input ) ; n_frames -= start_index ; // invoke the decoder start_time = clock() ; decoder->decode( decoder_input+start_index , n_frames , &n_actual_words , &actual_words , &actual_words_times ) ; end_time = clock() ; decode_time = (real)(end_time-start_time) / CLOCKS_PER_SEC ; // process the decoding result if ( remove_sent_marks == true ) removeSentMarksFromActual( decoder->vocabulary ) ; if ( output_result == true ) outputText( decoder->vocabulary ) ; // Free up some memory for( int i=0 ; i<(n_frames+start_index) ; i++ ) free( decoder_input[i] ) ; free( decoder_input ) ; decoder_input = NULL ; n_emission_probs = 0 ; n_features = 0 ; } void DecoderSingleTest::removeSentMarksFromActual( Vocabulary *vocabulary ) { if ( (n_actual_words == 0) || (vocabulary == NULL) ) return ; // remove the sentence start word if ( actual_words[0] == vocabulary->sent_start_index ) { for ( int j=1 ; jsent_end_index ) n_actual_words-- ; // remove any instances of silence if ( vocabulary->sil_index >= 0 ) { for ( int j=0 ; jsil_index) ) { for ( int k=(j+1) ; kn_total_frames ; n_features = htk_data->frame_size ; decoder_input = (real **)Allocator::sysAlloc( n_frames * sizeof(real *) ) ; // read the HTK data into a sequence Sequence *temp_seq = new Sequence( n_frames , n_features ) ; htk_data->getSequence( 0 , temp_seq ) ; // copy the sequence data into the decoder_input array for ( int i=0 ; iframes[i] , n_features * sizeof(real) ) ; } delete htk_data ; delete temp_seq ; break ; } case DST_FEATS_ONLINE_FTRS: { loadOnlineFtrs( test_filename ) ; break ; } case DST_FEATS_ONLINE_FTRS_ARCHIVE: { loadOnlineFtrsFromArchive( archive_fd ) ; break ; } case DST_PROBS_LNA8BIT: { loadLNA8bit( test_filename ) ; break ; } case DST_PROBS_LNA8BIT_ARCHIVE: { loadLNA8bitFromArchive( archive_fd ) ; break ; } default: error("DecoderSingleTest::loadDataFile - data_format not recognised\n") ; } #ifdef DEBUG if ( (n_features==0) && (n_emission_probs==0) ) error("DecoderSingleTest::loadDataFile - no data loaded\n") ; #endif } void DecoderSingleTest::outputText( Vocabulary *vocab ) { real sec_duration , sec_start_time ; if ( (test_filename == NULL) && (archive_offset<0) ) return ; if ( (n_expected_words > 0) && (output_ctm == false) ) { // We have expected words, so output a verbose results containing // the filename, expected result, actual result, segmentation. if ( test_filename != NULL ) fprintf( output_fd , "%s\n" , test_filename ) ; else fprintf( output_fd , "\n") ; fprintf( output_fd , "\tExpected : ") ; for ( int i=0 ; i ") ; else fprintf( output_fd , "%s " , vocab->getWord(expected_words[i]) ) ; } fprintf( output_fd , "\n\tActual : ") ; for ( int i=0 ; igetWord(actual_words[i]) ) ; } fprintf( output_fd , " [ ") ; for ( int i=0 ; i0) ) { if ( actual_words[0] != vocab->sent_start_index ) error("DST::outputText - did not see sent start symbol in output\n") ; if ( actual_words[n_actual_words-1] != vocab->sent_end_index ) error("DST::outputText - did not see sent end symbol in output\n") ; for ( int i=1 ; i<(n_actual_words-1) ; i++ ) { sec_duration = ( actual_words_times[i+1] - actual_words_times[i] ) / frames_per_msec / 1000.0 ; sec_start_time = actual_words_times[i] / frames_per_msec / 1000.0 ; fprintf( output_fd , "%d A %.3f %.3f %s\n" , test_id , sec_start_time , sec_duration , vocab->getWord(actual_words[i]) ) ; } } else { // We just want to output the actual result words - nothing more or less for ( int i=0 ; igetWord(actual_words[i]) ) ; fprintf( output_fd , "\n") ; } fflush( output_fd ) ; } void DecoderSingleTest::loadLNA8bit( char *lna_filename ) { FILE *lna_fd ; int buf_size , step_size , i ; unsigned char buf[2000] ; real sum=0.0 ; #ifdef DEBUG if ( sizeof(unsigned char) != 1 ) error("DecoderSingleTest::loadLNA8bit - unsigned char not 1 byte\n") ; if ( (lna_filename == NULL) || (strcmp(lna_filename,"")==0) ) error("DecoderSingleTest::loadLNA8bit - lna_filename undefined\n") ; if ( phone_models->n_emission_probs <= 0 ) error("DecoderSingleTest::loadLNA8bit - ph_models->n_emission_probs not set\n") ; #endif n_frames = 0 ; decoder_input = NULL ; n_emission_probs = phone_models->n_emission_probs ; step_size = 1 + (n_emission_probs * sizeof(unsigned char)) ; if ( (lna_fd = fopen( lna_filename , "r" )) == NULL ) error("DecoderSingleTest::loadLNA8bit - error opening LNA file\n") ; do { if ( (buf_size=(int)fread( buf , 1 , step_size , lna_fd )) != step_size ) error("DecoderSingleTest::loadLNA8bit - error reading prob vector\n") ; if ( (buf[0] != 0x00) && (buf[0] != 0x80) ) error("DecoderSingleTest::loadLNA8bit - flag byte error\n") ; n_frames++ ; decoder_input = (real **)Allocator::sysRealloc( decoder_input , n_frames*sizeof(real *) ) ; decoder_input[n_frames-1] = (real *)Allocator::sysAlloc( n_emission_probs * sizeof(real) ) ; // Convert from the 8-bit integer in the file to the equivalent floating point // log probability. sum = 0.0 ; for ( i=0 ; i 1.03) ) error("DecoderSingleTest::loadLNA8bit - sum of probs = %.4f not in [0.97,1.03]\n",sum) ; } while ( buf[0] != 0x80 ) ; // We're done. n_features = 0 ; fclose( lna_fd ) ; } void DecoderSingleTest::loadLNA8bitFromArchive( FILE *archive_fd ) { int buf_size , step_size , i ; unsigned char buf[2000] ; real sum=0.0 ; #ifdef DEBUG if ( sizeof(unsigned char) != 1 ) error("DecoderSingleTest::loadLNA8bitFromArchive - unsigned char not 1 byte\n") ; if ( archive_fd == NULL ) error("DecoderSingleTest::loadLNA8bitFromArchive - archive_fd NULL\n") ; if ( phone_models->n_emission_probs <= 0 ) error("DecoderSingleTest::loadLNA8bitFromArchive - ph_models->n_emission_probs not set\n") ; if ( archive_offset < 0 ) error("DecoderSingleTest::loadLNA8bitFromArchive - archive_offset not setup\n") ; #endif n_frames = 0 ; decoder_input = NULL ; n_emission_probs = phone_models->n_emission_probs ; step_size = 1 + (n_emission_probs * sizeof(unsigned char)) ; // Go to the correct place in the archive file fseek( archive_fd , archive_offset , SEEK_SET ) ; do { if ( (buf_size=(int)fread( buf , 1 , step_size , archive_fd )) != step_size ) error("DecoderSingleTest::loadLNA8bitFromArchive - error reading prob vector\n") ; if ( (buf[0] != 0x00) && (buf[0] != 0x80) ) error("DecoderSingleTest::loadLNA8bitFromArchive - flag byte error\n") ; n_frames++ ; decoder_input = (real **)Allocator::sysRealloc( decoder_input , n_frames*sizeof(real *) ) ; decoder_input[n_frames-1] = (real *)Allocator::sysAlloc( n_emission_probs * sizeof(real) ) ; // Convert from the 8-bit integer in the file to the equivalent floating point // log probability. sum = 0.0 ; for ( i=0 ; i 1.03) ) error("DST::loadLNA8bitFromArchive - sum_probs=%.4f not in [0.97,1.03]\n",sum) ; } while ( buf[0] != 0x80 ) ; // We're done. n_features = 0 ; } void DecoderSingleTest::loadOnlineFtrs( char *online_ftrs_filename ) { // Only read the first sentence in the file. DiskXFile *online_ftrs_fd ; unsigned char buf[2000] , flag ; #ifdef DEBUG if ( decoder_input != NULL ) error("DecoderSingleTest::loadOnlineFtrs - already have decoder input data\n") ; if ( (sizeof(unsigned char) != 1) || (sizeof(float) != 4) ) error("DecoderSingleTest::loadOnlineFtrs - types have unexpected sizes\n") ; #endif n_frames = 0 ; decoder_input = NULL ; n_features = phone_models->n_features ; // Open the file online_ftrs_fd = new DiskXFile( online_ftrs_filename , "r" ) ; do { // manually read the flag byte (so that DiskXFile does not reverse). online_ftrs_fd->read( &flag , sizeof(unsigned char) , 1 ) ; // read the features if ( online_ftrs_fd->read( buf , sizeof(float) , n_features ) != n_features ) error("DecoderSingleTest::loadOnlineFtrs - error reading feature vector\n") ; if ( (flag != 0x00) && (flag != 0x80) ) error("DecoderSingleTest::loadOnlineFtrs - flag byte error\n") ; n_frames++ ; decoder_input = (real **)Allocator::sysRealloc( decoder_input, n_frames*sizeof(real *) ) ; decoder_input[n_frames-1] = (real *)Allocator::sysAlloc( n_features * sizeof(real) ) ; #ifdef USE_DOUBLE for ( int i=0 ; in_features <= 0 ) error("DecoderSingleTest::loadOnlineFtrsFromArchive - ph_models->n_features not set\n") ; if ( archive_offset < 0 ) error("DecoderSingleTest::loadOnlineFtrsFromArchive - archive_offset not setup\n") ; #endif n_frames = 0 ; decoder_input = NULL ; n_features = phone_models->n_features ; // Go to the correct place in the archive file arch_file = new DiskXFile( archive_fd ) ; arch_file->seek( archive_offset , SEEK_SET ) ; // Read until the end of the the sentence. do { // manually read the flag byte (so that DiskXFile does not reverse). arch_file->read( &flag , sizeof(unsigned char) , 1 ) ; if ( arch_file->read( buf , sizeof(float) , n_features ) != n_features ) error("DecoderSingleTest::loadOnlineFtrsFromArchive - error reading feature vector\n") ; if ( (flag != 0x00) && (flag != 0x80) ) error("DecoderSingleTest::loadOnlineFtrsFromArchive - flag byte error\n") ; n_frames++ ; decoder_input = (real **)Allocator::sysRealloc( decoder_input, n_frames*sizeof(real *) ) ; decoder_input[n_frames-1] = (real *)Allocator::sysAlloc( n_features * sizeof(real) ) ; #ifdef USE_DOUBLE for ( int i=0 ; in_states ; // Allocate memory to hold the states states = (DecodingHMMState **)Allocator::sysAlloc( n_states * sizeof(DecodingHMMState *) ) ; // Create each state in turn. for ( short i=0 ; istates[i] , emis_prob_vec_indices[i] ) ; } // Now go through the log_transitions array in the HMM instance // and extract only the non-zero transitions FROM this state. for ( short from=0 ; fromlog_transitions[to][from] > LOG_ZERO ) { log_trans[n_neighbours] = orig_model->log_transitions[to][from] ; neighbour_states[n_neighbours++] = to ; } } setupSuccessorStates( states[from] , n_neighbours , neighbour_states , log_trans ) ; } free( log_trans ) ; free( neighbour_states ) ; } DecodingHMM::DecodingHMM( int n_models , DecodingHMM **models ) { // This will take a list of smaller models and concatenate them // into a single model. // Typically used to form a word-level model from a collection // of phoneme models. mergeModels( n_models , models ) ; } DecodingHMM::DecodingHMM( short n_states_ , Distribution **states_ , real **log_trans_probs_ , short *emis_prob_vec_indices ) { real *log_trans ; short *neighbour_states , n_neighbours=0 ; n_states = n_states_ ; log_trans = (real *)Allocator::sysAlloc( 1000 * sizeof(real) ) ; neighbour_states = (short *)Allocator::sysAlloc( 1000 * sizeof(short) ) ; // Allocate memory to hold the states states = (DecodingHMMState **)Allocator::sysAlloc( n_states * sizeof(DecodingHMMState *) ) ; // Create each state in turn for ( short i=0 ; i LOG_ZERO ) { log_trans[n_neighbours] = log_trans_probs_[from][to] ; neighbour_states[n_neighbours++] = to ; } } setupSuccessorStates( states[from] , n_neighbours , neighbour_states , log_trans ) ; } free( log_trans ) ; free( neighbour_states ) ; } DecodingHMM::~DecodingHMM() { if ( states != NULL ) { for ( short i=0 ; isuccessor_states ) ; free( states[i]->suc_log_trans_probs ) ; free( states[i] ) ; } free( states ) ; } } void DecodingHMM::mergeModels( int n_models , DecodingHMM **models ) { short index , prev_n_states , old_n_sucs ; real old_prob ; DecodingHMMState **new_states=NULL ; if ( n_models > 1 ) { mergeModels( n_models-1 , models+1 ) ; // We now need to merge models[0] with the current contents of this instance // into a model that has the initial state of model[0], emitting states of // model[0], emitting states of this instance, final state of this instance. prev_n_states = n_states ; n_states += (models[0]->n_states - 2) ; new_states = (DecodingHMMState **)Allocator::sysAlloc( n_states * sizeof(DecodingHMMState *) ) ; // Create new state instances corresponding to each state in the model we have // to merge with (except the final state) and insert these at the start // of the new array of states. index = 0 ; for ( short i=0 ; i<(models[0]->n_states-1) ; i++ ) { new_states[index] = (DecodingHMMState *)Allocator::sysAlloc( sizeof(DecodingHMMState) ) ; initState( new_states[index] , models[0]->states[i]->distribution , models[0]->states[i]->emission_prob_vec_index ) ; index++ ; } // Copy all existing states except the initial state into the correct positions // at the end of the array of states and update their successor indices to // reflect the new positions. for ( short i=1 ; in_successors ; j++ ) new_states[index]->successor_states[j] += (models[0]->n_states - 2) ; index++ ; } // Update the successor indices of the existing initial state of this instance // to reflect the new state positions. for ( short j=0 ; jn_successors ; j++ ) states[0]->successor_states[j] += (models[0]->n_states - 2) ; // Now update the successor information for the states from the first model. for ( short i=0 ; i<(models[0]->n_states-1) ; i++ ) { // Copy the successor information setupSuccessorStates( new_states[i] , models[0]->states[i]->n_successors , models[0]->states[i]->successor_states , models[0]->states[i]->suc_log_trans_probs ) ; // Look at the last successor entry for each state. If it is the final // state of the first model, remove the entry and replace it with the successors // of the initial state of the second model. old_prob = new_states[i]->suc_log_trans_probs[new_states[i]->n_successors-1] ; old_n_sucs = new_states[i]->n_successors ; if ( new_states[i]->successor_states[new_states[i]->n_successors-1] == (models[0]->n_states-1) ) { new_states[i]->n_successors += (states[0]->n_successors - 1) ; new_states[i]->successor_states = (short *)Allocator::sysRealloc( new_states[i]->successor_states , new_states[i]->n_successors * sizeof(short) ) ; new_states[i]->suc_log_trans_probs = (real *)Allocator::sysRealloc( new_states[i]->suc_log_trans_probs , new_states[i]->n_successors * sizeof(real) ) ; for ( short j=0 ; j<(states[0]->n_successors) ; j++ ) { new_states[i]->successor_states[old_n_sucs+j-1] = states[0]->successor_states[j] ; new_states[i]->suc_log_trans_probs[old_n_sucs+j-1] = old_prob + states[0]->suc_log_trans_probs[j] ; } } } if ( states[0]->successor_states != NULL ) free( states[0]->successor_states ) ; if ( states[0]->suc_log_trans_probs != NULL ) free( states[0]->suc_log_trans_probs ) ; free( states[0] ) ; free( states ) ; states = new_states ; } else if ( n_models == 1 ) { // If we only have 1 model in the input array, just copy its contents n_states = models[0]->n_states ; states = (DecodingHMMState **)Allocator::sysAlloc( n_states * sizeof(DecodingHMMState *) ) ; for ( short i=0 ; istates[i]->distribution , models[0]->states[i]->emission_prob_vec_index ) ; setupSuccessorStates( states[i] , models[0]->states[i]->n_successors , models[0]->states[i]->successor_states , models[0]->states[i]->suc_log_trans_probs ) ; } } } void DecodingHMM::initState( DecodingHMMState *state , Distribution *distribution_ , short emission_prob_vec_index_ ) { state->distribution = distribution_ ; state->emission_prob_vec_index = emission_prob_vec_index_ ; state->n_successors = 0 ; state->successor_states = NULL ; state->suc_log_trans_probs = NULL ; } void DecodingHMM::setupSuccessorStates( DecodingHMMState *state , short n_successors_ , short *successor_states_ , real *log_trans_probs_ ) { state->n_successors = n_successors_ ; if ( n_successors_ > 0 ) { state->successor_states = (short *)Allocator::sysAlloc( n_successors_ * sizeof(short) ) ; state->suc_log_trans_probs = (real *)Allocator::sysAlloc( n_successors_ * sizeof(real) ) ; for ( int i=0 ; isuccessor_states[i] = successor_states_[i] ; state->suc_log_trans_probs[i] = log_trans_probs_[i] ; } } } #ifdef DEBUG void DecodingHMM::outputText() { printf("DecodingHMM with %d states\n*************************\n" , n_states) ; for ( int i=0 ; in_successors) ; for ( int j=0 ; jn_successors ; j++ ) printf("%d ",states[i]->successor_states[j]) ; printf(" ") ; for ( int j=0 ; jn_successors ; j++ ) printf("%.20f ",states[i]->suc_log_trans_probs[j]) ; printf("\n") ; } printf("\n") ; } #endif } torch3-3.1.orig/decoder/DecodingHMM.h0000644000175000017500000001026710106445236017537 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef DECODINGHMM_INC #define DECODINGHMM_INC #include "general.h" #include "Distribution.h" #include "HMM.h" namespace Torch { /* This structure is used to store all HMM state information required for decoding. Each state contains a list of successor states and associated transition probabilities. @author Darren Moore (moore@idiap.ch) */ typedef struct { Distribution *distribution ; short emission_prob_vec_index ; short n_successors ; short *successor_states ; real *suc_log_trans_probs ; } DecodingHMMState ; /** This class contains all HMM information required for decoding. Most information is embedded in the states themselves (DecodingHMMState structures). The DecodingHMM can be created a number of ways to facilitate compatibility with the Torch HMM class and to allow easy concatenation of models (eg. when constructing word models from phoneme models). @author Darren Moore (moore@idiap.ch) */ class DecodingHMM { public: short n_states ; DecodingHMMState **states ; /* Constructors / destructor */ DecodingHMM() ; /// Converts a Torch HMM instance to a DecodingHMM representation. The Torch HMM /// class contains a lot of member variables used in training that are not /// required when decoding, as well as a full transition matrix. DecodingHMM( HMM *orig_model , short *emis_prob_vec_indices ) ; /// Concatenates all elements in the array of DecodingHMM instances into a single /// DecodingHMM. Component models that have initial-final state transitions are ok. DecodingHMM( int n_models , DecodingHMM **models ) ; /// Creates a DecodingHMM using the distributions in 'states_' and the log /// transition probabilties in 'log_trans_probs_'. DecodingHMM( short n_states_ , Distribution **states_ , real **log_trans_probs_ , short *emis_prob_vec_indices ) ; virtual ~DecodingHMM() ; /* Methods */ /// Internal function to merge component models into a big HMM void mergeModels( int n_models , DecodingHMM **models ) ; /// Configures successor state information for the state denoted by 'state'. void setupSuccessorStates( DecodingHMMState *state , short n_successors_ , short *sucessor_states_ , real *log_trans_probs_ ) ; /// Initialises the state denoted by 'state' with a Distribution and /// optionally an index into the vector of emission probabilities. void initState( DecodingHMMState *state , Distribution *distribution_ , short emission_prob_vec_index_=-1 ) ; #ifdef DEBUG void outputText() ; #endif } ; } #endif torch3-3.1.orig/decoder/DecodingHypothesis.cc0000644000175000017500000000757010106445236021416 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Allocator.h" #include "DecodingHypothesis.h" #include "WordChainElemPool.h" #include "log_add.h" namespace Torch { WordChainElemPool DecodingHypothesis::word_chain_elem_pool(1000) ; DecodingHypothesis::DecodingHypothesis() { word = -1 ; state = -1 ; score = LOG_ZERO ; word_level_info = NULL ; } DecodingHypothesis::DecodingHypothesis( int word_ , int state_ ) { word = word_ ; state = state_ ; score = LOG_ZERO ; word_level_info = NULL ; } DecodingHypothesis::~DecodingHypothesis() { if ( word_level_info != NULL ) { if ( --(word_level_info->n_connected) <= 0 ) DecodingHypothesis::word_chain_elem_pool.returnElem( word_level_info ) ; } } void DecodingHypothesis::initHyp( int word_ , int state_ ) { word = word_ ; state = state_ ; score = LOG_ZERO ; word_level_info = NULL ; } void DecodingHypothesis::deactivate() { score = LOG_ZERO ; if ( word_level_info != NULL ) { if ( --(word_level_info->n_connected) <= 0 ) { // Only this hypothesis is accessing this word-level information. // Return the word_level_info instance to the word_chain_elem_pool. #ifdef DEBUG if ( word_level_info->n_connected < 0 ) error("DecodingHypothesis::deactivate - n_connected < 0\n") ; #endif DecodingHypothesis::word_chain_elem_pool.returnElem( word_level_info ) ; } word_level_info = NULL ; } } void DecodingHypothesis::extendWord( real new_score , WordChainElem *new_word_chain_elem ) { #ifdef DEBUG if ( new_word_chain_elem == NULL ) error("DecodingHypothesis:extendWord - new_word_chain_elem is NULL\n"); #endif deactivate() ; score = new_score ; word_level_info = new_word_chain_elem ; new_word_chain_elem->n_connected++ ; } void DecodingHypothesis::extendState( DecodingHypothesis *prev_hyp , real new_score ) { #ifdef DEBUG if ( prev_hyp->word_level_info == NULL ) error("DecodingHypothesis:extendState - prev_hyp->word_level_info is NULL\n") ; #endif deactivate() ; score = new_score ; word_level_info = prev_hyp->word_level_info ; if ( word_level_info != NULL ) word_level_info->n_connected++ ; } #ifdef DEBUG void DecodingHypothesis::outputText() { printf("word=%d, state=%d, score=%.3f\n",word,state,score); } #endif } torch3-3.1.orig/decoder/DecodingHypothesis.h0000644000175000017500000000607310106445236021255 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef DECODINGHYPOTHESIS_INC #define DECODINGHYPOTHESIS_INC #include "general.h" #include "WordChainElemPool.h" namespace Torch { /** This class contains all hypothesis data that needs to be updated and propagated as hypotheses are extended through word models and across word boundaries. Methods are provided to transfer data between DecodingHypothesis objects in varying ways. @author Darren Moore (moore@idiap.ch) */ class DecodingHypothesis { public: int word ; // the index of the pronunciation in the lexicon int state ; real score ; WordChainElem *word_level_info ; static WordChainElemPool word_chain_elem_pool ; /* Constructors / destructor */ DecodingHypothesis() ; DecodingHypothesis( int word_ , int state_ ) ; virtual ~DecodingHypothesis() ; /* Methods */ void initHyp( int word_ , int state_ ) ; /// Unlinks this instance from its word_level_info member variable. /// If this instance was the only entity connected to the /// word_level_info object, then the word_level_info object is /// returned to the global word_chain_elem_pool. void deactivate() ; /// Updates the hypothesis information when a word boundary /// has been crossed. void extendWord( real new_score , WordChainElem *new_word_chain_elem ) ; /// Updates the hypothesis information when a word-interior state /// transition has been made. void extendState( DecodingHypothesis *prev_hyp , real new_score ) ; #ifdef DEBUG void outputText() ; #endif } ; } #endif torch3-3.1.orig/decoder/LMCache.cc0000644000175000017500000001177310106445236017056 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Allocator.h" #include "LMCache.h" #include "log_add.h" namespace Torch { LMCache::LMCache( int max_entries_ , int lm_order , int n_vocab_words ) { if ( max_entries_ <= 0 ) error("LMCache::LMCache - max_entries_ cannot be <= 0\n") ; max_entries = max_entries_ ; entries = (LMCacheEntry **)Allocator::sysAlloc( max_entries * sizeof(LMCacheEntry *) ) ; for ( int i=0 ; in_prev_words ) { if ( memcmp( words , entries[i]->prev_words , n_prev_words*sizeof(int) ) == 0 ) { // Yes we do - add the probability to the existing entry entries[i]->addProb( words[n_prev_words] , prob ) ; // Increase the age of all cache entries and keep track of the oldest for ( j=0 ; jage) > max_age ) { max_age = entries[j]->age ; oldest = j ; } } return ; } } } // No we don't, we need to add this n-gram to the cache if ( n_entries < max_entries ) { // Our cache isn't full - we can simply add the new entry at the end entries[n_entries]->addNewPrevWords( n_prev_words , words ) ; entries[n_entries]->addProb( words[n_prev_words] , prob ) ; n_entries++ ; } else { // The cache is full - we need to replace the oldest entry with the new one entries[oldest]->addNewPrevWords( n_prev_words , words ) ; entries[oldest]->addProb( words[n_prev_words] , prob ) ; } // Increase the age of all cache entries and keep track of oldest for ( i=0 ; iage) > max_age ) { max_age = entries[i]->age ; oldest = i ; } } } real LMCache::getProb( int n_words , int *words ) { // The ordering of the words must be W3 W2 W1 W4 (for a 4-gram LM) // where W4 is "next" word and remainder are prev words. int max_age=0 , i , n_prev_words=(n_words-1) ; real prob = -LOG_ZERO ; for ( i=0 ; in_prev_words == n_prev_words ) { if ( memcmp( words , entries[i]->prev_words , n_prev_words*sizeof(int) ) == 0 ) { prob = entries[i]->getProb( words[n_prev_words] ) ; break ; } } } // Increase the age of all cache entries and keep track of oldest for ( i=0 ; iage) > max_age ) { max_age = entries[i]->age ; oldest = i ; } } return prob ; } } torch3-3.1.orig/decoder/LMCache.h0000644000175000017500000000625010106445236016712 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef LMCACHE_INC #define LMCACHE_INC #include "general.h" #include "LMCacheEntry.h" namespace Torch { /** This class implements a rudimentary caching scheme for language model lookup. It basically consists of a (small) list of the most recently accessed language model entries. A cache lookup entails a linear search of the list of entries. The oldest entry is overwritten when the cache is full and a new entry is added. @author Darren Moore (moore@idiap.ch) */ class LMCache { public: int max_entries ; int n_entries ; LMCacheEntry **entries ; int oldest ; /// Creates an empty cache. /// 'max_entries_' is the maximum number of entries in the cache. /// 'lm_order' is the order of the language model n-gram (ie 3 /// for a trigram LM) /// 'n_vocab_words' is the number of words in the vocabulary. LMCache( int max_entries_ , int lm_order , int n_vocab_words ) ; virtual ~LMCache() ; /// Adds an entry to the cache. If the cache is full and the new /// entry is not already in the cache, the oldest entry is /// overwritten. /// 'order' is the order of the entry, which can be <= the lm_order /// used during cache creation. /// 'words' are the words in the n-gram. The order is W3 W2 W1 W4 /// for a 4-gram entry. /// 'prob' is the log probability of the n-gram as calculated by the /// language model. void addEntry( int order , int *words , real prob ) ; /// Looks for the n-gram in 'words' within the cache and returns /// its probability if found, otherwise returns -LOG_ZERO. real getProb( int order , int *words ) ; }; } #endif torch3-3.1.orig/decoder/LMCacheEntry.cc0000644000175000017500000000655110106445236020076 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Allocator.h" #include "LMCacheEntry.h" #include "log_add.h" namespace Torch { LMCacheEntry::LMCacheEntry( int max_n_prev_words_ , int n_vocab_words ) { max_n_prev_words = max_n_prev_words_ ; prev_words = (int *)Allocator::sysAlloc( max_n_prev_words * sizeof(int) ) ; n_probs = n_vocab_words ; probs = (real *)Allocator::sysAlloc( n_probs * sizeof(real) ) ; for ( int i=0 ; i max_n_prev_words) || (n_prev_words_ <= 0) ) error("LMCacheEntry::addNewPrevWords - n_prev_words_ out of range\n") ; #endif memcpy( prev_words , prev_words_ , n_prev_words_*sizeof(int) ) ; n_prev_words = n_prev_words_ ; resetProbs() ; age = 0 ; } void LMCacheEntry::addProb( int vocab_word , real prob ) { #ifdef DEBUG if ( (vocab_word<0) || (vocab_word>=n_probs) ) error("LMCacheEntry::addProb - vocab word index out of range\n") ; if ( probs[vocab_word] < -LOG_ZERO ) error("LMCacheEntry::addProb - entry already set\n") ; #endif probs[vocab_word] = prob ; age = 0 ; } real LMCacheEntry::getProb( int vocab_word ) { #ifdef DEBUG if ( (vocab_word<0) || (vocab_word>=n_probs) ) error("LMCacheEntry::getProb - vocab word index out of range\n") ; #endif age = 0 ; return probs[vocab_word] ; } } torch3-3.1.orig/decoder/LMCacheEntry.h0000644000175000017500000000622410106445236017735 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef LMCACHEENTRY_INC #define LMCACHEENTRY_INC #include "general.h" namespace Torch { /** This class implements the internal entries within the LMCache class. Each entry consists of an array of previous words (eg. 2 previous words in a trigram entry), and a list of probabilities of next words given the previous words. There is also an age field that is used to keep track of how recently the entry was accessed. @author Darren Moore (moore@idiap.ch) */ class LMCacheEntry { public: int age ; int max_n_prev_words ; int n_prev_words ; int *prev_words ; int n_probs ; real *probs ; /// Creates the cache entry. /// 'max_n_prev_words_' is the maximum number of prev words that will /// ever be used with the cache entry (eg. 2 for trigram LM) /// 'n_vocab_words' is the number of words in the vocabulary. LMCacheEntry( int max_n_prev_words_ , int n_vocab_words ) ; virtual ~LMCacheEntry() ; /// Replaces the current prev words with new ones. The new entry can /// a number of prev words that is <= max_n_prev_words (ie. can /// cache unigram entries when using a trigram LM). void addNewPrevWords( int n_prev_words_ , int *prev_words_ ) ; /// Resets all of the next-word log probs to -LOG_ZERO (to indicate that /// we have no cached probs for all next words). void resetProbs() ; /// Adds a log probability for the next-word denoted by 'vocab_word'. void addProb( int vocab_word , real prob ) ; /// Returns the cached log probability for the next-word denoted by /// 'vocab_word'. real getProb( int word ) ; }; } #endif torch3-3.1.orig/decoder/LMInteriorLevelWordEntry.cc0000644000175000017500000001671710106445236022517 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Allocator.h" #include "LMInteriorLevelWordEntry.h" #include "log_add.h" namespace Torch { LMInteriorLevelWordEntry::LMInteriorLevelWordEntry( int word_ ) { #ifdef DEBUG if ( word_ < 0 ) error("LMInteriorLevelWordEntry::LMInteriorLevelWordEntry - word out of range\n") ; #endif word = word_ ; log_bo_weight = 0.0 ; next_level = NULL ; n_entries = 0 ; entries = NULL ; } LMInteriorLevelWordEntry::~LMInteriorLevelWordEntry() { if ( next_level != NULL ) delete next_level ; if ( entries != NULL ) free( entries ) ; } void LMInteriorLevelWordEntry::addProbEntry( int list_level , int order , int *words , real prob ) { // There are 'order' entries in 'words'. The ordering is eg. W3,W2,W1,W4 for order=4. if ( order == 1 ) { entries = (LMWordEntry *)Allocator::sysRealloc( entries , (n_entries+1)*sizeof(LMWordEntry) ) ; // Find the place in the list of words where we want to insert the new word if ( (n_entries == 0) || ((*words) > entries[n_entries-1].word) ) { entries[n_entries].word = *words ; entries[n_entries].log_prob = prob ; } else { // The new word does not belong at the end of the list - find // correct position in list and insert there. for ( int i=0 ; iaddProbEntry( order , words , prob ) ; } } void LMInteriorLevelWordEntry::addBackoffEntry( int list_level , int order , int *words , real bo_wt ) { // There should be 'order' entries in 'prev_words' and the ordering should // be eg. W3,W2,W1 if order == 3. if ( order == 0 ) log_bo_weight = bo_wt ; else { // This entry needs to be added to the 'next_level' (interior) list. if ( next_level == NULL ) next_level = new LMInteriorLevelWordList( list_level-1 ) ; next_level->addBackoffEntry( order , words , bo_wt ) ; } } bool LMInteriorLevelWordEntry::getProbWithBackoff( int order , int *prev_words , real *prob ) { // There should be 'order' entries in 'prev_words' and the ordering should // be eg. W3,W2,W1,W4 if order == 4. real temp ; #ifdef DEBUG if ( order < 1 ) error("LMInteriorLevelWordEntry::getProbWithBackoff - order out of range\n") ; #endif if ( order == 1 ) { if ( (*prob = getWordProb( *prev_words )) <= LOG_ZERO ) { *prob = log_bo_weight ; return false ; } else return true ; } else { if ( next_level == NULL ) { if ( (*prob = getWordProb( prev_words[order-1] )) <= LOG_ZERO ) { *prob = log_bo_weight ; return false ; } else return true ; } else { if ( next_level->getProbWithBackoff( order , prev_words , prob ) == true ) return true ; else { if ( (temp = getWordProb( prev_words[order-1] )) <= LOG_ZERO ) { *prob += log_bo_weight ; return false ; } else { *prob += temp ; return true ; } } } } } real LMInteriorLevelWordEntry::getWordProb( int word_ ) { // We assume that the list of words is in ascending order so // that we can do a binary search. int min=0 , max=(n_entries-1) , curr_pos=0 ; if ( n_entries == 0 ) return LOG_ZERO ; if ( n_entries <= 10 ) { // just do a linear search for ( int i=0 ; i entries[curr_pos].word ) min = curr_pos+1 ; else return entries[curr_pos].log_prob ; if ( min > max ) return LOG_ZERO ; } } return LOG_ZERO ; } #ifdef DEBUG void LMInteriorLevelWordEntry::outputText( Vocabulary *vocab , int *words , int n_words ) { words[n_words++] = word ; for ( int j=0 ; jgetWord( words[n_words-i-1] ) ) ; printf("%s\n" , vocab->getWord( entries[j].word )) ; } printf("BACKOFF ") ; for ( int i=0 ; igetWord( words[n_words-i-1] ) ) ; printf("%f\n" , log_bo_weight ) ; if ( next_level != NULL ) next_level->outputText( vocab , words , n_words ) ; } #endif } torch3-3.1.orig/decoder/LMInteriorLevelWordEntry.h0000644000175000017500000000547610106445236022361 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef LMINTERIORLEVELWORDENTRY_INC #define LMINTERIORLEVELWORDENTRY_INC #include "general.h" #include "LMInteriorLevelWordList.h" #ifdef DEBUG #include "Vocabulary.h" #endif namespace Torch { typedef struct { int word ; real log_prob ; } LMWordEntry ; class LMInteriorLevelWordList ; /** This class is used internally within the language model n-gram data structures. It contains a list of LM probabilities relevant to the level of the tree structure where it exists, and a link to the next level of the tree structure. @author Darren Moore (moore@idiap.ch) */ class LMInteriorLevelWordEntry { public: int word ; real log_bo_weight ; LMInteriorLevelWordList *next_level ; int n_entries ; LMWordEntry *entries ; /* Constructors / destructor */ LMInteriorLevelWordEntry( int word_ ) ; virtual ~LMInteriorLevelWordEntry() ; /* Methods */ void addProbEntry( int list_level , int order , int *words , real prob ) ; void addBackoffEntry( int list_level , int order , int *words , real bo_wt ) ; bool getProbWithBackoff( int order , int *prev_words , real *prob ) ; // Internal function. real getWordProb( int word ) ; #ifdef DEBUG void outputText( Vocabulary *vocab , int *words , int n_words ) ; #endif }; } #endif torch3-3.1.orig/decoder/LMInteriorLevelWordList.cc0000644000175000017500000001670010106445236022321 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Allocator.h" #include "LMInteriorLevelWordList.h" #include "log_add.h" namespace Torch { LMInteriorLevelWordList::LMInteriorLevelWordList( int level_ ) { if ( level_ < 1 ) error("LMInteriorLevelWordList::LMInteriorLevelWordList - cannot have level_ < 1\n") ; level = level_ ; n_entries = 0 ; entries = NULL ; } LMInteriorLevelWordList::~LMInteriorLevelWordList() { if ( entries != NULL ) { for ( int i=0 ; i level ) error("LMInteriorLevelWordList - addEntry - order of new entry must be <= level\n") ; #endif // Do we have an entry for the word at this level ? if ( (new_word = findWord( *words )) == NULL ) { // No we don't so we need to create an entry and add it to our 'entries' array. entries = (LMInteriorLevelWordEntry **)Allocator::sysRealloc( entries , (n_entries+1)*sizeof(LMInteriorLevelWordEntry *) ) ; new_word = new LMInteriorLevelWordEntry( *words ) ; // If the list is empty, just insert it straight off. Also // do an initial check to see if the word belongs at the end of the array if ( (n_entries == 0) || (new_word->word > entries[n_entries-1]->word) ) { entries[n_entries] = new_word ; } else { // Find the place in the list of words where we want to insert the new word for ( int i=0 ; iword ) { // Shuffle down all words from i onwards and place the // new word in position i. memmove( entries+i+1 , entries+i , (n_entries-i)*sizeof(LMInteriorLevelWordEntry *) ) ; entries[i] = new_word ; break ; } } } n_entries++ ; } // 'new_word' now points to the entry for the word corresponding to this level. // Continue adding our entry. new_word->addProbEntry( level , order-1 , words+1 , prob ) ; } void LMInteriorLevelWordList::addBackoffEntry( int order , int *words , real bo_wt ) { LMInteriorLevelWordEntry *new_word ; // There should be 'order' entries in 'words' and the order should be // most-recent word first. eg. [w4 w3 w2 w1] for 4-gram #ifdef DEBUG if ( (order > level) || (order < 1) ) error("LMInteriorLevelWordList::addBackoffEntry - order out of range\n") ; #endif if ( (new_word = findWord( *words )) == NULL ) { // No we don't so we need to create an entry and add it to our 'entries' array. entries = (LMInteriorLevelWordEntry **)Allocator::sysRealloc( entries , (n_entries+1)*sizeof(LMInteriorLevelWordEntry *) ) ; new_word = new LMInteriorLevelWordEntry( *words ) ; // If the list is empty, just insert it straight off. Also // do an initial check to see if the word belongs at the end of the array if ( (n_entries == 0) || (new_word->word > entries[n_entries-1]->word) ) { entries[n_entries] = new_word ; } else { // Find the place in the list of words where we want to insert the new word for ( int i=0 ; iword > new_word->word ) { // Shuffle down all words from i onwards and place the // new word in position i. memmove( entries+i+1 , entries+i , (n_entries-i)*sizeof(LMInteriorLevelWordEntry *) ) ; entries[i] = new_word ; break ; } } } n_entries++ ; } new_word->addBackoffEntry( level , order-1 , words+1 , bo_wt ) ; } bool LMInteriorLevelWordList::getProbWithBackoff( int order , int *words , real *prob ) { LMInteriorLevelWordEntry *word_entry ; // There should be 'order' entries in 'words'. The ordering should be eg. // W3,W2,W1,W4 when order=4 if ( (word_entry = findWord( *words )) != NULL ) return word_entry->getProbWithBackoff( order-1 , words+1 , prob ) ; else { *prob = 0.0 ; return false ; } } LMInteriorLevelWordEntry *LMInteriorLevelWordList::findWord( int word_ ) { // We assume that the list of words is in ascending order so // that we can do a binary search. int min=0 , max=(n_entries-1) , curr_pos=0 ; if ( n_entries == 0 ) return NULL ; if ( n_entries <= 10 ) { // just do a linear search for ( int i=0 ; iword ) return NULL ; else if ( word_ == entries[i]->word ) return entries[i] ; } } else { // do a binary search while (1) { curr_pos = min+(max-min)/2 ; if ( word_ < entries[curr_pos]->word ) max = curr_pos-1 ; else if ( word_ > entries[curr_pos]->word ) min = curr_pos+1 ; else return entries[curr_pos] ; if ( min > max ) return NULL ; } } return NULL ; } #ifdef DEBUG void LMInteriorLevelWordList::outputText( Vocabulary *vocab , int *words , int n_words ) { for ( int i=0 ; ioutputText( vocab , words , n_words ) ; } #endif } torch3-3.1.orig/decoder/LMInteriorLevelWordList.h0000644000175000017500000000776610106445236022177 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef LMINTERIORLEVELWORDLIST_INC #define LMINTERIORLEVELWORDLIST_INC #include "general.h" #include "LMInteriorLevelWordEntry.h" #ifdef DEBUG #include "Vocabulary.h" #endif namespace Torch { /** This class is used internally within the language model n-gram data structures. It is basically a list of LMInteriorLevelWordEntry instances that are sorted by vocab word id. @author Darren Moore (moore@idiap.ch) */ class LMInteriorLevelWordEntry ; class LMInteriorLevelWordList { public: int level ; int n_entries ; LMInteriorLevelWordEntry **entries ; /* Constructors / destructor */ /// Creates the list. Instances of this object are used at different /// levels in the tree-like n-gram data structures. /// 'level_' denotes the level in the tree of this instance. LMInteriorLevelWordList( int level_ ) ; virtual ~LMInteriorLevelWordList() ; /* Methods */ /// Adds a language model probability. 'order' denotes the order /// of the entry that is being added, and 'words' contains the /// vocabulary indices of the words in the n-gram. The ordering /// of 'words' should be eg. W3,W2,W1,W4 when 'order' is 4 and /// where W4 is the "most-recent" word. void addProbEntry( int order , int *words , real prob ) ; /// Adds a language model backoff weight. 'order' denotes the order /// of the entry that is being added, and 'words' contains the /// vocabulary indices of the words in the n-gram. The ordering /// of 'words' should be eg. W4,W3,W2,W1 when 'order' is 4 and /// where W4 is the "most-recent" word. void addBackoffEntry( int order , int *words , real bo_wt ) ; /// Calculates a language model probability for a particular word /// sequence with backoff. 'order' denotes the number of words /// in the word sequence, and 'words' contains the vocabulary /// indices of the words. The ordering of 'words' should be /// eg. W3,W2,W1,W4 when 'order' is 4 and where W4 is the /// "most-recent" word. Returns false if no entry was found, /// indicating that the value of prob contains only an /// accumulated backoff weight. Returns true if a valid /// LM prob has been determined. bool getProbWithBackoff( int order , int *words , real *prob ) ; /// Internal function. LMInteriorLevelWordEntry *findWord( int word ) ; #ifdef DEBUG void outputText( Vocabulary *vocab , int *words , int n_words ) ; #endif }; } #endif torch3-3.1.orig/decoder/LMNGram.cc0000644000175000017500000001500010106445236017042 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Allocator.h" #include "LMNGram.h" namespace Torch { LMNGram::LMNGram( int n_ , Vocabulary *vocab_ ) { #ifdef DEBUG if ( n_ < 1 ) error("LMNGram::LMNGram - n cannot be < 1\n") ; #endif n = n_ ; vocab = vocab_ ; // Create a unigram entry for every word in our vocabulary. unigrams = (real *)Allocator::sysAlloc( vocab->n_words * sizeof(real) ) ; for ( int i=0 ; in_words ; i++ ) unigrams[i] = LOG_ZERO ; if ( n > 1 ) next_level = new LMInteriorLevelWordList( n-1 ) ; else next_level = NULL ; // Configure the cache - TODO make the cache size configurable (?) cache = new LMCache( 10 , n , vocab->n_words ) ; } LMNGram::~LMNGram() { if ( unigrams != NULL ) free(unigrams) ; if ( next_level != NULL ) delete next_level ; if ( cache != NULL ) delete cache ; } void LMNGram::addEntry( int order , int *words , real prob , real bo_wt ) { // We assume that the 'words' array is in oldest-word-first order. // ie. [w1 w2 w3] for trigram entry. int r_words_1[30] ; // rearranged words [w4,w3,w2,w1] int r_words_2[30] ; // rearranged words [w3,w2,w1,w4] #ifdef DEBUG if ( (order < 1) || (order > n) ) error("LMNGram::addEntry - order out of range\n") ; #endif if ( order == 1 ) { if ( unigrams[*words] > LOG_ZERO ) error("LMNGram::addEntry - duplicate unigram entry encountered\n") ; unigrams[*words] = prob ; if ( n > 1 ) { // We are adding a unigram entry into a language model // of higher order, so the backoff weight is important. // Add the backoff weight. next_level->addBackoffEntry( order , words , bo_wt ) ; } } else { // The first thing we want to do is to rearrange the words in the entry // so that the order is straight forward and matches the architecture // of the language model, for adding both probs and backoffs. for ( int i=0 ; i<(order-1) ; i++ ) { r_words_1[i] = words[order-1-i] ; // backoff ordering r_words_2[i] = words[order-2-i] ; // prob ordering } r_words_1[order-1] = words[0] ; r_words_2[order-1] = words[order-1] ; if ( prob > LOG_ZERO ) next_level->addProbEntry( order , r_words_2 , prob ) ; if ( order < n ) { // Add the backoff - we don't have/need, eg, trigram backoffs in our // trigram language model. next_level->addBackoffEntry( order , r_words_1 , bo_wt ) ; } } } real LMNGram::getLogProbBackoff( int order , int *words ) { // There are 'order' entries in 'words'. // The ordering in words is W3,W2,W1,W4 for a 4-gram query. // ie. for the query : what is P(W4|W1,W2,W3) ? real temp , prob ; #ifdef DEBUG if ( order < 1 ) error("LMNGram::getNextWordList - order out of range\n") ; bool output_debug=false ; if ( output_debug == true ) { printf( "P( %s | " , vocab->words[words[order-1]] ) ; for ( int i=0 ; i<(order-1) ; i++ ) printf( "%s " , vocab->words[words[i]] ) ; printf(") = ") ; } #endif if ( order > n ) order = n ; if ( order == 1 ) { // Just return the unigrams prob - no need to backoff or to use the cache. #ifdef DEBUG if ( output_debug == true ) printf("%f\n",unigrams[*words]); #endif return unigrams[*words] ; } else { // look in the cache ... prob = cache->getProb( order , words ) ; if ( prob >= (-LOG_ZERO) ) { // The n-gram entry is not in the cache, so we need to search for it // and then add it to the cache. if ( next_level->getProbWithBackoff( order , words , &prob ) == false ) { // No bigram, trigram, etc entries so backoff to unigram temp = unigrams[words[order-1]] ; if ( temp <= LOG_ZERO ) prob = LOG_ZERO ; else if ( prob <= LOG_ZERO ) prob = temp ; else prob += temp ; } cache->addEntry( order , words , prob ) ; #ifdef DEBUG if ( output_debug == true ) printf("%f\n",prob) ; #endif return prob ; } else { // The entry was in the cache. Just return it #ifdef DEBUG if ( output_debug == true ) printf("(c)%f\n",prob); #endif return prob ; } } } #ifdef DEBUG void LMNGram::outputText() { int words[30] ; // Print the unigrams printf("\\1-gram\\\n") ; for ( int i=0 ; in_words ; i++ ) printf("%s %f\n",vocab->words[i],unigrams[i]) ; // Print the rest if ( next_level != NULL ) next_level->outputText( vocab , words , 0 ) ; } #endif } torch3-3.1.orig/decoder/LMNGram.h0000644000175000017500000000647310106445236016722 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef LMNGRAM_INC #define LMNGRAM_INC #include "general.h" #include "log_add.h" #include "LMInteriorLevelWordList.h" #include "Vocabulary.h" #include "LMCache.h" namespace Torch { /** This class is the main class for N-gram language modelling. After language model entries have been added, lookups with full backoff can be performed. @author Darren Moore (moore@idiap.ch) */ class LMNGram { public: /// The order of the N-Gram int n ; Vocabulary *vocab ; /// The unigram probability entries, 1 for every vocab word. real *unigrams ; /// Points to the start of the language model tree structure. LMInteriorLevelWordList *next_level ; /// The cache used for fast LM lookup. LMCache *cache ; /* Constructors / destructor */ /// Creates an empty N-Gram data structure. 'n_' is the N-gram order. LMNGram( int n_ , Vocabulary *vocab_ ) ; virtual ~LMNGram() ; /* Methods */ /// Adds a new entry to the N-gram. /// 'order' is the order of the entry (ie. 2 for a bigram entry). /// The 'words' array needs to be in oldest-word-first order. /// ([w1 w2 w3] for trigram entry). /// 'prob' is the log probability for the entry /// 'bo_wt' is the log back-off weight for the entry. If order is /// equal to the LMNGram order (n), then 'bo_wt' is ignored. void addEntry( int order , int *words , real prob , real bo_wt=LOG_ZERO ) ; /// Finds the N-gram probability of a given word sequence, with full /// backoff. /// 'order' is the number of words in the 'words' array. /// The ordering in 'words' is W3,W2,W1,W4 for a 4-gram query. /// (ie. for the query : what is P(W4|W1,W2,W3)?) real getLogProbBackoff( int order , int *words ) ; #ifdef DEBUG void outputText() ; #endif }; } #endif torch3-3.1.orig/decoder/LanguageModel.cc0000644000175000017500000006274110106445236020327 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Allocator.h" #include "LanguageModel.h" #include "string_stuff.h" #include "DiskXFile.h" namespace Torch { LanguageModel::LanguageModel( int order_ , Vocabulary *vocabulary_ , char *lm_fname , real lm_scaling_factor_ ) { FILE *lm_fd ; char buf[4] ; if ( vocabulary_ == NULL ) error("LanguageModel::LanguageModel - no vocabulary defined\n") ; if ( order_ <= 0 ) error("LanguageModel::LanguageModel - LM order must be > 0\n") ; if ( (lm_fname == NULL) || (strcmp(lm_fname,"")==0) ) error("LanguageModel::LanguageModel - no LM filename specified\n") ; order = order_ ; vocabulary = vocabulary_ ; n_words = vocabulary->n_words ; lm_scaling_factor = lm_scaling_factor_ ; ngram = new LMNGram( order , vocabulary ) ; lm_has_start_word = false ; lm_has_end_word = false ; // Open the LM file if ( (lm_fd = fopen( lm_fname , "r" )) == NULL ) error("LanguageModel::LanguageModel - error opening LM file\n") ; // Read the first 4 bytes to see if the file is a Noway binary file if ( (int)fread( buf , sizeof(char) , 4 , lm_fd ) != 4 ) error("LanguageModel::LanguageModel - error reading first 4 bytes\n") ; if ( (strcmp( buf , "NG3" ) == 0) || (strcmp( buf , "TR2" ) == 0) ) { // The file is in Noway binary format. readNowayBin( lm_fd ) ; } else { // Assume that the file is in ARPA format. fseek( lm_fd , 0 , SEEK_SET ) ; readARPA( lm_fd ) ; } fclose( lm_fd ) ; #ifdef DEBUG //outputText() ; #endif } LanguageModel::~LanguageModel() { if ( ngram != NULL ) delete ngram ; } real LanguageModel::calcLMProb( DecodingHypothesis *prev_word_end_hyp , int next_word ) { int words[30] , n_wrds ; WordChainElem *temp_elem ; real prob ; #ifdef DEBUG if ( next_word < 0 ) error("LanguageModel::calcLMProb(2) - next_word < 0\n") ; #endif // We have a word end hypothesis and a next word. // We want to calculate the LM probability for this next word. // eg. We have a word end for some w2 and a next word w3. // We want to calculate P(w3|w1,w2) // If the next word is silence or a sentence marker, don't do a LM lookup. if ( (next_word == vocabulary->sent_start_index) && (lm_has_start_word == false) ) return 0.0 ; if ( (next_word == vocabulary->sent_end_index) && (lm_has_end_word == false) ) return 0.0 ; if ( next_word == vocabulary->sil_index ) return 0.0 ; // Construct a list of the previous words. n_wrds = 0 ; if ( prev_word_end_hyp != NULL ) { temp_elem = prev_word_end_hyp->word_level_info ; #ifdef DEBUG if ( prev_word_end_hyp->word_level_info == NULL ) error("LanguageModel::calcLMProb(2) - word_level_info is NULL\n") ; #endif while ( temp_elem != NULL ) { if ( (temp_elem->word == vocabulary->sil_index) || ((lm_has_start_word==false) && (temp_elem->word==vocabulary->sent_start_index)) ) { // skip these words for the purpose of LM lookups temp_elem = temp_elem->prev_elem ; continue ; } words[n_wrds++] = temp_elem->word ; temp_elem = temp_elem->prev_elem ; if ( n_wrds >= (order-1) ) break ; } } words[n_wrds++] = next_word ; // Find the n-gram probability prob = ngram->getLogProbBackoff( n_wrds , words ) ; // Scale the n-gram probability using the LM scaling factor. // Note that we are multiplying the log LM prob by the scaling factor prob *= lm_scaling_factor ; return ( prob ) ; } real LanguageModel::calcLMProb( DecodingHypothesis *word_end_hyp ) { int words[30] , n_wrds , temp , j ; WordChainElem *temp_elem ; real prob ; if ( word_end_hyp == NULL ) return LOG_ZERO ; // We have a word end hypothesis. We want to tune this using our language model. // eg. we have a word end for some w3 and we want to tune this using P(w3|w1,w2) // Construct a list of the current word and previous words. n_wrds = 0 ; temp_elem = word_end_hyp->word_level_info ; #ifdef DEBUG if ( word_end_hyp->word_level_info == NULL ) error("LanguageModel::calcLMProb - word_level_info is NULL\n") ; #endif // If the most recent word is a sentence marker and the LM does not // have entries for the sentence markers, don't do a LM lookup. if ( (temp_elem->word == vocabulary->sent_start_index) && (lm_has_start_word == false) ) return 0.0 ; if ( (temp_elem->word == vocabulary->sent_end_index) && (lm_has_end_word == false) ) return 0.0 ; if ( temp_elem->word == vocabulary->sil_index ) return 0.0 ; while ( temp_elem != NULL ) { if ( (temp_elem->word == vocabulary->sil_index) || ((lm_has_start_word==false) && (temp_elem->word==vocabulary->sent_start_index)) ) { // skip these words for the purpose of LM lookups temp_elem = temp_elem->prev_elem ; continue ; } words[n_wrds++] = temp_elem->word ; temp_elem = temp_elem->prev_elem ; if ( n_wrds >= order ) break ; } // The method in LMNGram requires a different ordering of words temp = words[0] ; for ( j=1 ; jgetLogProbBackoff( n_wrds , words ) ; // Scale the n-gram probability using the LM scaling factor. // Note that we are multiplying the log LM prob by the scaling factor // IS THIS OK ? prob *= lm_scaling_factor ; // Tune the word end hypothesis score using the n-gram probability and return return ( prob ) ; } void LanguageModel::readARPA( FILE *arpa_fd ) { int n_exp_entries[30] ; // n_exp_entries[0] is the expected number of unigram entries // n_exp_entries[0] is the expected number of bigram entries int n_act_entries[30] ; // the actual number of entries read from the file. int words[30] ; // holds the predecessor words for a given word. real curr_prob=0.0 , curr_bow=0.0 ; char *curr_word=NULL ; int curr_index ; real ln_10 = (real)log(10.0) ; int tempn=0 , tempn_entries=0 , max_n_in_file=0 ; char line[1000] ; bool got_begin_data_mark=false , expecting_end=false , got_end=false , error_flag ; int curr_gram_data=0 ; if ( arpa_fd == NULL ) error("LanguageModel::readARPA - arpa_fd is NULL\n") ; // discard lines until we get the "beginning of data mark". while ( fgets( line , 1000 , arpa_fd ) != NULL ) { // if the new line is empty, get the next line if ( (line[0]==' ') || (line[0]=='\r') || (line[0]=='\n') || (line[0]=='\t') || (line[0]=='#') ) continue ; if ( line[0] == '\\' ) { strtoupper( line ) ; if ( strstr( line , "\\END\\" ) != NULL ) { if ( curr_gram_data < order ) { // we haven't encountered the n-grams we expected error("LanguageModel::readARPA - not enough data in file\n") ; } // we've reached the end of the ARPA file - we're done got_end = true ; break ; } else if ( expecting_end == true ) { // we're expecting the end marker and didn't get it - get the next line continue ; } else if ( strstr( line , "\\DATA\\" ) != NULL ) { if ( got_begin_data_mark == true ) { // we have already seen the beginning of data marker - error ! error("LanguageModel::readARPA - duplicate beginning of data marker\n") ; } got_begin_data_mark = true ; } else if ( strstr( line , "-GRAMS:" ) != NULL ) { if ( got_begin_data_mark == true ) { // find out whether we are at the start of the 1-gram, // 2-gram, 3-gram, etc data. if ( (curr_gram_data+1) != ( line[1]-0x30 ) ) error("LanguageModel::readARPA - N-Gram N out of order\n") ; curr_gram_data = line[1]-0x30 ; n_act_entries[curr_gram_data-1] = 0 ; if ( curr_gram_data > order ) { // the order has exceeded the order of our LM - we're done // reading probabilties. expecting_end = true ; } } } else { // we got something else that started with a '\' - error !! error("LanguageModel::readARPA - unrecognised marker\n%s\n",line) ; } } else { if ( (got_begin_data_mark == false) || (expecting_end == true) ) continue ; if ( strstr( line , "") != NULL ) { n_exp_entries[curr_gram_data-1]-- ; continue ; } if ( curr_gram_data == 0 ) { // we are just below the \data\ - therefore expecting ngram x=y lines sscanf( line , "%*s %d=%d" , &tempn , &tempn_entries ) ; if ( tempn != (max_n_in_file+1) ) error("LanguageModel::readARPA - ngram n=y -> unexpected n\n") ; max_n_in_file = tempn ; if ( tempn <= order ) n_exp_entries[tempn-1] = tempn_entries ; } else if ( (curr_gram_data > 0) && (curr_gram_data < max_n_in_file) ) { // The line should contain (curr_gram_data+2) fields. // eg. for 2-gram entry -> p wd_1 wd_2 bo_wt_2 // Read the probability from the first field (in log10 format) and convert // to ln format. #ifdef USE_DOUBLE if ( sscanf( line , "%lf" , &curr_prob ) != 1 ) #else if ( sscanf( line , "%f" , &curr_prob ) != 1 ) #endif error("LanguageModel::readARPA - error reading prob\n") ; if ( curr_prob < -90.0 ) curr_prob = LOG_ZERO/2 ; else curr_prob *= ln_10 ; // get past the prob field so we can read the words strtok( line , " \n\r\t" ) ; // read wd_1 , ... , wd_n (ie. all predecessor words of wd_n) error_flag = false ; for ( int i=0 ; igetIndex( curr_word ) ; if ( curr_index < 0 ) { // The word is not in our vocab - don't add the entry to our LM error_flag = true ; n_exp_entries[curr_gram_data-1]-- ; break ; //error("LanguageModel::readARPA - %s in ARPA file not in vocab\n" , // curr_word ) ; } else { if ( curr_index == vocabulary->sent_start_index ) lm_has_start_word = true ; if ( curr_index == vocabulary->sent_end_index ) lm_has_end_word = true ; } // Place the word index into the array of predecessor words in // oldest-word-first order. words[i] = curr_index ; } if ( error_flag == true ) continue ; // Extract the back off weight from the last field in the line and // convert from log10 to ln. #ifdef USE_DOUBLE if ( sscanf( strtok( NULL , " \n\r\t" ) , "%lf" , &curr_bow ) != 1 ) #else if ( sscanf( strtok( NULL , " \n\r\t" ) , "%f" , &curr_bow ) != 1 ) #endif error("LanguageModel::readARPA - back off weight not found\n") ; if ( curr_bow < -90.0 ) curr_bow = 0.0 ; else curr_bow *= ln_10 ; // add the entry to the curr_gram_data-gram for the new word ngram->addEntry( curr_gram_data, words, curr_prob, curr_bow ) ; n_act_entries[curr_gram_data-1]++ ; } else if ( curr_gram_data == max_n_in_file ) { // The line should contain (curr_gram_data+1) fields because // backoff weights are only required for N-grams that form a prefix of // longer N-grams in the model file (ie. not this one - the longest). // eg. for 4-gram entry -> p wd_1 wd_2 wd_3 wd_4 // (where 4-gram probabilities are the maximum in the file. // read the probability from the first field #ifdef USE_DOUBLE sscanf( line , "%lf" , &curr_prob ) ; #else sscanf( line , "%f" , &curr_prob ) ; #endif if ( curr_prob < -90.0 ) curr_prob = LOG_ZERO/2 ; else curr_prob *= ln_10 ; // get past the prob field so we can read the words strtok( line , " \n\r\t" ) ; // read wd_1 , ... , wd_n and insert indices in 'words' array. error_flag = false ; for ( int i=0 ; igetIndex( curr_word ) ; if ( curr_index < 0 ) { // The word is not in our vocab - don't add the entry to our LM error_flag = true ; n_exp_entries[curr_gram_data-1]-- ; break ; //error("LanguageModel::readARPA - %s in ARPA file not in vocab\n" , // curr_word ) ; } else { if ( curr_index == vocabulary->sent_start_index ) lm_has_start_word = true ; if ( curr_index == vocabulary->sent_end_index ) lm_has_end_word = true ; } // Place the word index into the array of predecessor words in // oldest-word-first order. words[i] = curr_index ; } if ( error_flag == true ) continue ; // add the entry to the curr_gram_data-gram for the new word n_act_entries[curr_gram_data-1]++ ; ngram->addEntry( curr_gram_data , words , curr_prob ) ; } } } // make sure that we got the /end/ marker if ( got_end == false ) error("LanguageModel::readARPA - EOF but no end marker\n") ; // Issue warnings if the number of expected entries for each n-gram did // not match the actual number read from the file for ( int i=0 ; i 3 ) error("LanguageModel::readNowayBin - order is greater than 3\n") ; lm_file = new DiskXFile( nw_fd ) ; // Assume that the first 4-bytes have already been read, // and have been verified to contain TR2 or NG3. // Read the number of unigrams, bigrams and trigrams. if ( lm_file->read( n_xgrams , sizeof(int) , 3 ) != 3 ) error("LanguageModel::readNowayBin - error reading number of ngrams\n") ; // Allocate memory to hold the mapping between our vocab indices and // the indices in the file. vocab_index_map = (int *)Allocator::sysAlloc( n_xgrams[0] * sizeof(int) ) ; // Now read in the "vocabulary" for ( int i=0 ; iread( &c , 1 , 1 ) ; while ( c > 0 ) { *bptr = (unsigned char)c ; bptr++ ; lm_file->read( &c , 1 , 1 ) ; } *bptr = '\0' ; //strtoupper( buf ) ; // Read the word index from the file if ( lm_file->read( &wd_index , sizeof(unsigned short) , 1 ) != 1 ) error("LanguageModel::readNowayBin - error reading word index\n") ; if ( wd_index >= n_xgrams[0] ) error("LanguageModel::readNowayBin - word index exceeds num unigrams\n") ; // Find the index of the word in our vocabulary and store the mapping. vocab_index_map[wd_index] = vocabulary->getIndex( buf ) ; if ( (vocab_index_map[wd_index] >= 0) && (vocab_index_map[wd_index] == vocabulary->sent_start_index) ) lm_has_start_word = true ; if ( (vocab_index_map[wd_index] >= 0) && (vocab_index_map[wd_index] == vocabulary->sent_end_index) ) lm_has_end_word = true ; } // Now we read the unigram, bigram and trigram entries. for ( int i=0 ; iread( &wd_index , sizeof(unsigned short) , 1 ) != 1 ) error("LanguageModel::readNowayBin - error reading unigram word index\n") ; if ( vocab_index_map[wd_index] < 0 ) uni_invocab = false ; // Read the probability ( -(log10(prob)*8192) format ) and convert to ln(prob). if ( lm_file->read( &log10_prob , sizeof(unsigned short) , 1 ) != 1 ) error("LanguageModel::readNowayBin - error reading unigram prob\n") ; prob = ((real)log10_prob / 8192.0) * neg_ln_10 ; // Read the backoff weight if ( lm_file->read( &log10_backoff , sizeof(short) , 1 ) != 1 ) error("LanguageModel::readNowayBin - error reading unigram backoff\n") ; backoff = ((real)log10_backoff / 8192.0) * ln_10 ; #ifdef DEBUG if ( prob > 0.0 ) error("LanguageModel::readNowayBin - prob > 0.0\n") ; #endif // Read the number of bigrams associated with this word if ( lm_file->read( &n_bigrams , sizeof(unsigned short) , 1 ) != 1 ) error("LanguageModel::readNowayBin - error reading number of bigrams\n") ; total_bigrams += n_bigrams ; if ( uni_invocab == true ) { // Add the new entry to the 1-gram words[0] = vocab_index_map[wd_index] ; ngram->addEntry( 1 , words , prob , backoff ) ; } // Now read all the bigrams that have the current word as the predecessor for ( int j=0 ; jread( &wd_index , sizeof(unsigned short) , 1 ) != 1 ) error("LanguageModel::readNowayBin - error reading bigram word index\n") ; if ( vocab_index_map[wd_index] < 0 ) bi_invocab = false ; // Read the probability ( -(log10(prob)*8192) format ) and convert to ln(prob). if ( lm_file->read( &log10_prob , sizeof(unsigned short) , 1 ) != 1 ) error("LanguageModel::readNowayBin - error reading bigram prob\n") ; prob = ((real)log10_prob / 8192.0) * neg_ln_10 ; // Read the backoff weight if ( lm_file->read( &log10_backoff , sizeof(short) , 1 ) != 1 ) error("LanguageModel::readNowayBin - error reading bigram backoff\n") ; backoff = ((real)log10_backoff / 8192.0) * ln_10 ; #ifdef DEBUG if ( prob>0.0 ) error("LanguageModel::readNowayBin - bigram prob > 0.0\n") ; #endif // Read the number of trigrams associated with this word if ( lm_file->read( &n_trigrams , sizeof(unsigned short) , 1 ) != 1 ) error("LanguageModel::readNowayBin - error reading number of trigrams\n") ; total_trigrams += n_trigrams ; // Add the new entry to the 2-gram if ( (uni_invocab == true) && (bi_invocab == true) ) { words[1] = vocab_index_map[wd_index] ; if ( order >= 2 ) ngram->addEntry( 2 , words , prob , backoff ) ; } // Now read all the trigrams that have the current bigram as the predecessor for ( int k=0 ; kread( &wd_index , sizeof(unsigned short) , 1 ) != 1 ) error("LanguageModel::readNowayBin - error reading trigram word index\n") ; if ( vocab_index_map[wd_index] < 0 ) tri_invocab = false ; // Read the probability ( -(log10(prob)*8192) format ) and convert to ln(prob). if ( lm_file->read( &log10_prob , sizeof(unsigned short) , 1 ) != 1 ) error("LanguageModel::readNowayBin - error reading trigram prob\n") ; prob = ((real)log10_prob / 8192.0) * neg_ln_10 ; #ifdef DEBUG if ( prob > 0.0 ) error("LanguageModel::readNowayBin - bigram prob or backoff > 0.0\n") ; #endif // Add the new entry to the 3-gram if ( (uni_invocab == true) && (bi_invocab == true) && (tri_invocab == true) ) { words[2] = vocab_index_map[wd_index] ; if ( order >= 3 ) ngram->addEntry( 3 , words , prob ) ; } tri_invocab = true ; } bi_invocab = true ; } uni_invocab = true ; } if ( (total_bigrams != n_xgrams[1]) || (total_trigrams != n_xgrams[2]) ) error("LanguageModel::readNowayBin - did not read expected number of bi & trigrams\n") ; free( vocab_index_map ) ; delete lm_file ; } #ifdef DEBUG void LanguageModel::outputText() { ngram->outputText() ; } #endif } torch3-3.1.orig/decoder/LanguageModel.h0000644000175000017500000000664310106445236020170 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef LANGUAGEMODEL_INC #define LANGUAGEMODEL_INC #include "general.h" #include "DecodingHypothesis.h" #include "Vocabulary.h" #include "LMNGram.h" namespace Torch { /** This object implements an n-gram language model. The n-gram data structures are encapsulated in the ngram member variable (see LMNGram class). Methods are provided to read a LM file in ARPA format or in Noway binary format. A method is provided to calculate a LM prob (with backoff) for a given sequence of words. @author Darren Moore (moore@idiap.ch) */ class LanguageModel { public: int order ; int n_words ; Vocabulary *vocabulary ; LMNGram *ngram ; real lm_scaling_factor ; bool lm_has_start_word ; bool lm_has_end_word ; /* constructors / destructor */ /// Creates the language model. /// 'order_' is the order of the LM (eg. 3 for trigram). LanguageModel( int order_ , Vocabulary *vocabulary_ , char *lm_fname , real lm_scaling_factor_=1.0 ) ; virtual ~LanguageModel() ; /* methods */ /// Calculates the language model probability (with backoff) of the word /// sequence stored in the hypothesis pointed to by 'word_end_hyp'. real calcLMProb( DecodingHypothesis *word_end_hyp ) ; /// Calculates the language model probability (with backoff) of 'next_word' /// given the previous word sequence stored in the hypothesis pointed to /// by 'prev_word_end_hyp'. real calcLMProb( DecodingHypothesis *prev_word_end_hyp , int next_word ) ; /// Creates a language model from an ARPA format file. Internal function. void readARPA( FILE *arpa_fd ) ; /// Creates a language model from an Noway binary LM format file. Internal function. /// nb. Only the TR2 and NG3 types are supported (ie. trigrams). void readNowayBin( FILE *nw_fd ) ; #ifdef DEBUG void outputText() ; #endif }; } #endif torch3-3.1.orig/decoder/LinearLexicon.cc0000644000175000017500000001601210106445236020345 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Allocator.h" #include "LinearLexicon.h" #include "log_add.h" #include "string_stuff.h" namespace Torch { LinearLexicon::LinearLexicon( SpeechHMM *speech_hmm , PhoneModels *phone_models_ ) { if ( speech_hmm == NULL ) error("LinearLexicon::LinearLexicon(2) - speech_hmm is NULL\n") ; createLinearLexicon( speech_hmm->lexicon , phone_models_ ) ; } LinearLexicon::LinearLexicon( LexiconInfo *lex_info_ , PhoneModels *phone_models_ ) { createLinearLexicon( lex_info_ , phone_models_ ) ; } void LinearLexicon::createLinearLexicon( LexiconInfo *lex_info_ , PhoneModels *phone_models_ ) { DecodingHMM **temp_models=NULL ; int max_n_phones=100 , n_phones ; short n_sucs , *sucs ; real *sucs_log_trans_probs ; if ( lex_info_ == NULL ) error("LinearLexicon::createLinearLexicon - lex_info_ is NULL\n") ; if ( phone_models_ == NULL ) error("LinearLexicon::createLinearLexicon - phone_models_ is NULL\n") ; lex_info = lex_info_ ; phone_models = phone_models_ ; if ( (lex_info->sent_start_index >=0) && (lex_info->sent_start_index == lex_info->sent_end_index) ) error("LinearLexicon::createLinearLexicon - sent start & end cannot be the same word\n") ; n_models = lex_info->n_entries ; models = (DecodingHMM **)Allocator::sysAlloc( n_models * sizeof(DecodingHMM *) ) ; total_states = 0 ; // Allocate memory to hold the temporary list of models that gets assembled for each word temp_models = (DecodingHMM **)Allocator::sysAlloc( max_n_phones * sizeof(DecodingHMM *) ) ; for ( int i=0 ; ientries[i].n_phones ; // Check that the temp array that is used to assemble the list of phone models is // big enough. if ( n_phones > max_n_phones ) { // Realloc some more memory for our temp array max_n_phones = n_phones + 1 ; temp_models = (DecodingHMM **)Allocator::sysRealloc( temp_models , max_n_phones * sizeof(DecodingHMM *) ) ; } for ( int j=0 ; jmodels[lex_info->entries[i].phones[j]] ; if ( (lex_info->phone_info->pause_index >= 0) && (lex_info->entries[i].phones[n_phones-1] != lex_info->phone_info->pause_index) && (lex_info->entries[i].phones[n_phones-1] != lex_info->phone_info->sil_index) ) { // If there is a pause phone defined, and the word does not already have a pause // or silence phone at the end, then add the pause model to the end of the list. temp_models[n_phones] = phone_models->models[phone_models->phone_info->pause_index] ; n_phones++ ; } models[i] = new DecodingHMM( n_phones , temp_models ) ; // Check that there is not an initial to final state transition n_sucs = models[i]->states[0]->n_successors ; sucs = models[i]->states[0]->successor_states ; sucs_log_trans_probs = models[i]->states[0]->suc_log_trans_probs ; if ( sucs[n_sucs-1] == (models[i]->n_states-1) ) error("LinearLexicon::createLinearLexicon - initial-final transition in word %d\n",i) ; // Scale the transition probs from the initial state of the new model // using the prior. for ( int j=0 ; jentries[i].log_prior ; } // Calculate the total number of states in all pronunciation models for ( int i=0 ; in_states ; if ( temp_models != NULL ) free( temp_models ) ; } LinearLexicon::~LinearLexicon() { if ( models != NULL ) { for ( int i=0 ; i= n_models) ) error("LinearLexicon::calcEmissionProb - word out of range\n") ; #endif return phone_models->calcEmissionProb( models[word]->states[state]->emission_prob_vec_index , models[word]->states[state]->distribution ) ; } int LinearLexicon::nStatesInModel( int model ) { #ifdef DEBUG if ( (model < 0) || (model >= n_models) ) error("LinearLexicon::nStatesInModel - model out of range\n") ; #endif return models[model]->n_states ; } void LinearLexicon::getSuccessorInfo( int word , int state , short *n_sucs , short **sucs , real **log_trans_probs ) { #ifdef DEBUG if ( (word < 0) || (word >= n_models) ) error("LinearLexicon::getSuccessorInfo - word out of range\n") ; if ( (state < 0) || (state >= models[word]->n_states) ) error("LinearLexicon::getSuccessorInfo - state out of range\n") ; #endif *n_sucs = models[word]->states[state]->n_successors ; *sucs = models[word]->states[state]->successor_states ; *log_trans_probs = models[word]->states[state]->suc_log_trans_probs ; } #ifdef DEBUG void LinearLexicon::outputText() { printf("LinearLexicon: Number of models = %d\n" , n_models) ; for ( int i=0 ; ivocabulary->getWord( lex_info->entries[i].vocab_index ) , models[i]->n_states ) ; models[i]->outputText() ; } } #endif } torch3-3.1.orig/decoder/LinearLexicon.h0000644000175000017500000000740710106445236020217 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef LINEARLEXICON_INC #define LINEARLEXICON_INC #include "general.h" #include "LexiconInfo.h" #include "PhoneModels.h" #include "DecodingHMM.h" #include "SpeechHMM.h" namespace Torch { /** This class is essentially an array of DecodingHMM instances, representing the HMM's for each pronunciation we can recognise. The ordering of this array conforms to the ordering the LexInfo instance that is passed as a parameter to the constructor. @author Darren Moore (moore@idiap.ch) */ class LinearLexicon { public: LexiconInfo *lex_info ; PhoneModels *phone_models ; int n_models ; DecodingHMM **models ; int total_states ; /* Constructors / destructor */ /// Uses the phoneme DecodingHMM instances in 'phone_models_', /// and the phonetic transcription info for the pronunciations /// as defined in 'lex_info_' and creates a complete DecodingHMM /// instance for each pronunciation. LinearLexicon( LexiconInfo *lex_info_ , PhoneModels *phone_models_ ) ; /// Extracts LexiconInfo pointer from 'speech_hmm' then proceeds /// in the same way as the first constructor. LinearLexicon( SpeechHMM *speech_hmm , PhoneModels *phone_models_ ) ; virtual ~LinearLexicon() ; /* Methods */ /// Internal function. Both constructors call this to create the /// required data structures. void createLinearLexicon( LexiconInfo *lex_info_ , PhoneModels *phone_models_ ) ; /// Returns the number of states in a particular model including /// non-emitting states. int nStatesInModel( int model ) ; /// Calculates the emission probability for a state in a particular /// word model using the current input vector. Only checks for /// out of range input parameters in the debug version. real calcEmissionProb( int model , int state ) ; /// Returns the number of successor states, the successor states /// themselves and the associated log transition probabilities. /// Does not copy data - just returns pointers to the originals. void getSuccessorInfo( int model , int state , short *n_sucs , short **sucs , real **log_trans_probs ) ; #ifdef DEBUG void outputText() ; #endif }; } #endif torch3-3.1.orig/decoder/Makefile0000644000175000017500000000172710106445236016751 0ustar kalfakalfa00000000000000# get user and architecture specific options OS := $(shell uname -s) TORCHDIR := $(shell cd ..; pwd) include ../Makefile_options_$(OS) CC_FILES := $(wildcard *.cc) OBJS := $(foreach f,$(CC_FILES),$(OBJS_DIR)/$(patsubst %.cc,%.o,$(f))) all: $(LIBTORCH) $(LIBTORCH): $(OBJS) @echo "Archiving..." @$(AR) $(LIBTORCH) $(OBJS) $(OBJS_DIR)/%.o: %.cc @echo $< @$(CC) $(CFLAGS_$(MODE)) $(INCS) -o $@ -c $< distclean: @\rm -f .deps_* clean: @echo "Remove objects file and dependencies..." @\rm -Rf $(OBJS) $(LIBTORCH) @\rm -f .deps_$(VERSION_KEY) depend: @echo "Tracking dependencies..." @\rm -f .deps_$(VERSION_KEY) @for file in *.cc ; do printf "$(OBJS_DIR)/" >> .deps_$(VERSION_KEY); $(DEP) $(CFLAGS_$(MODE)) $(INCS) $$file >> .deps_$(VERSION_KEY); done .deps_$(VERSION_KEY): @echo ">>> Please do a 'make depend' <<<" exit 10 ifneq ($(MAKECMDGOALS),distclean) ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),depend) include .deps_$(VERSION_KEY) endif endif endif torch3-3.1.orig/decoder/PhoneModels.cc0000644000175000017500000010366610106445236020042 0ustar kalfakalfa00000000000000// Copyright (C) 2003--2004 Darren Moore (moore@idiap.ch) // // This file is part of Torch 3.1. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "Allocator.h" #include "PhoneModels.h" #include "DiagonalGMM.h" #include "log_add.h" #include "string_stuff.h" #include "SpeechMLP.h" #include "SpeechMLPDistr.h" namespace Torch { PhoneModels::PhoneModels( SpeechHMM *speech_hmm ) { short *emis_prob_indices ; if ( speech_hmm == NULL ) error("PhoneModels::PhoneModels(2) - speech_hmm is NULL\n") ; phone_info = speech_hmm->lexicon->phone_info ; n_models = speech_hmm->n_models ; models = NULL ; n_features = 0 ; n_emission_probs = 0 ; input_vecs_are_features = true ; curr_input_vec = NULL ; curr_emission_probs = NULL ; log_phone_priors = NULL ; log_phone_del_pen = 0.0 ; acoustic_scale_factor = 1.0 ; log_emission_prob_floor = 0.0 ; apply_pause_del_pen = false ; mlp = NULL ; models = (DecodingHMM **)Allocator::sysAlloc( n_models * sizeof(DecodingHMM *) ) ; log_phone_priors = (real *)Allocator::sysAlloc( n_models * sizeof(real) ) ; for ( int i=0 ; imodels[i]->n_states * sizeof(short) ) ; emis_prob_indices[0] = -1 ; emis_prob_indices[speech_hmm->models[i]->n_states - 1] = -1 ; for ( int j=1 ; j<(speech_hmm->models[i]->n_states - 1) ; j++ ) emis_prob_indices[j] = n_emission_probs++ ; models[i] = new DecodingHMM( speech_hmm->models[i] , emis_prob_indices ) ; log_phone_priors[i] = 0.0 ; free( emis_prob_indices ) ; } // The number of inputs for the Distribution of each state should be the number of features if ( (n_features = models[0]->states[1]->distribution->n_inputs) <= 0 ) error("PhoneModels::PhoneModels(2) - n_features <= 0\n") ; // Allocate memory to hold emission probs as they are calculated. curr_emission_probs = (real *)Allocator::sysAlloc( n_emission_probs * sizeof(real) ) ; for ( int i=0 ; in_features ; if ( mlp->n_mlp_outputs != n_emission_probs ) error("PhoneModels::PhoneModels - n_mlp_outputs not match n_emission_probs\n") ; // Now we need a Distribution that can be associated with each emitting // state of each phone in our phoneset. for ( int i=0 ; in_states)-1 ; j++ ) { models[i]->states[j]->distribution = new SpeechMLPDistr( mlp , &curr_emission_probs , models[i]->states[j]->emission_prob_vec_index , log_phone_priors ) ; } } } } // Check that we were able to determine the input vector size, and/or the // total number of emission probabilities. if ( (input_vecs_are_features==true) && (n_features<=0) ) error("PhoneModels::PhoneModels - cannot have n_features <= 0\n") ; if ( n_emission_probs <= 0 ) error("PhoneModels::PhoneModels - cannot have n_emission_probs <= 0\n") ; // Now prepare for the type of input vectors if ( input_vecs_are_features == true ) { // Allocate memory to hold emission probs as they are calculated. curr_emission_probs = (real *)Allocator::sysAlloc( n_emission_probs * sizeof(real) ) ; for ( int i=0 ; i= n_models) || (index < 0) ) error("PhoneModels::getModel - index out of range\n") ; return models[index] ; } void PhoneModels::setInputVector( real *input_vec ) { curr_input_vec = input_vec ; if ( input_vecs_are_features == true ) { // The current emission prob values are now out of date - reset. for ( int j=0 ; jframeLogProbability( 0 , curr_input_vec ) ; } } return curr_emission_probs[prob_vec_index] * acoustic_scale_factor ; } void PhoneModels::readModelsFromHTK( FILE *models_fd ) { // Loads a HTK definition file containing multiple HMM definitions. // Only supports limited format - each HMM MUST be exactly same format as // Fig 7.3 in HTK manual. Therefore each state distribution can only // be a DiagonalGMM. char line[20000] , curr_model_name[100] , *value=NULL ; int n_mixtures=0 , temp , phone_index=-1 , read_state=0 , curr_mixture=0 ; real curr_mixture_weight=0.0 , **log_trans=NULL ; Distribution **states=NULL ; short curr_state=0 , curr_emis_prob_index=0 , *emis_prob_indices=NULL , n_states=0 ; // Allocate memory for the models models = (DecodingHMM **)Allocator::sysAlloc( phone_info->n_phones * sizeof(DecodingHMM *) ) ; // read the HMM model data from the HTK-format text file n_models = 0 ; curr_emis_prob_index = 0 ; read_state = 0 ; n_emission_probs = 0 ; while ( fgets( line , 20000 , models_fd ) != NULL ) { if ( strstr(line,"~h") ) { if ( read_state != 1 ) error("PhoneModels::readModelsFromHTK - ~h out of order\n") ; if ( sscanf( line , "%*s \"%[^\"]" , curr_model_name ) != 1 ) error("PhoneModels::readModelsFromHTK - error extracting phone name\n") ; if ( (phone_index = phone_info->getIndex( curr_model_name )) < 0 ) error("PhoneModels::readModelsFromHTK - %s not in phone_info\n",curr_model_name) ; n_models++ ; n_states = 0 ; read_state = 2 ; } else { strtoupper( line ) ; if ( strstr( line , "" ) ) { if ( read_state != 0 ) error("PhoneModels::readModelsFromHTK - out of order\n") ; if( sscanf( line , "%*s %d" , &n_features ) != 1 ) error("PhoneModels::readModelsFromHTK - error reading n_features\n") ; read_state = 1 ; } else if ( strstr( line , "" ) ) { if ( read_state != 2 ) error("PhoneModels::readModelsFromHTK - out of order\n") ; read_state = 3 ; } else if ( strstr( line , "" ) ) { if ( read_state != 3 ) error("PhoneModels::readModelsFromHTK - out of order\n") ; if ( sscanf( line , "%*s %hd" , &n_states ) != 1 ) error("PhoneModels::readModelsFromHTK - error extracting n_states\n") ; n_emission_probs += (n_states-2) ; emis_prob_indices = (short *)Allocator::sysAlloc( n_states * sizeof(short) ) ; emis_prob_indices[0] = -1 ; emis_prob_indices[n_states-1] = -1 ; for ( short j=1 ; j<(n_states-1) ; j++ ) emis_prob_indices[j] = curr_emis_prob_index++ ; // allocate memory for the array of Diagonal GMM's and the transitions states = (Distribution **)Allocator::sysAlloc( n_states * sizeof(Distribution *) ) ; log_trans = (real **)Allocator::sysAlloc( n_states * sizeof(real *) ) ; for ( short j=0 ; j") ) { if ( read_state != 4 ) { printf("%s %d %d\n",curr_model_name,curr_state,curr_mixture); error("PhoneModels::readModelsFromHTK - out of order\n%s\n",line) ; } if ( sscanf( line , "%*s %hd" , &curr_state ) != 1 ) error("PhoneModels::readModelsFromHTK - error reading curr_state\n") ; curr_state-- ; // index from 0 if ( (curr_state<1) || (curr_state>=(n_states-1)) ) error("PhoneModels::readModelsFromHTK - invalid curr_state\n") ; // There might be a on the same line if ( strstr(line,"") ) { if ( sscanf( line , "%*s %*d %*s %d" , &n_mixtures ) != 1 ) error("PhoneModels::readModelsFromHTK - error reading n_mixtures\n") ; curr_mixture = -1 ; read_state = 7 ; } else read_state = 5 ; } else if ( strstr(line,"") ) { if ( read_state != 5 ) error("PhoneModels::readModelsFromHTK - out of order\n") ; if ( sscanf( line , "%*s %d" , &n_mixtures ) != 1 ) error("PhoneModels::readModelsFromHTK - error reading n_mixtures (2)\n") ; curr_mixture = -1 ; read_state = 7 ; } else if ( strstr(line,"") ) { if ( read_state != 7 ) error("PhoneModels::readModelsFromHTK - out of order\n%s\n",line) ; if ( states[curr_state] == NULL ) states[curr_state] = new DiagonalGMM( n_features, n_mixtures ) ; #ifdef USE_DOUBLE if ( sscanf( line , "%*s %*d %lf" , &curr_mixture_weight ) != 1 ) #else if ( sscanf( line , "%*s %*d %f" , &curr_mixture_weight ) != 1 ) #endif error("PhoneModels::readModelsFromHTK - error reading curr_mix_wt\n") ; curr_mixture++ ; if ( (curr_mixture<0) || (curr_mixture>=n_mixtures) ) error("PhoneModels::readModelsFromHTK - invalid curr_mixture\n") ; if ( curr_mixture_weight == 0.0 ) ((DiagonalGMM *)states[curr_state])->log_weights[curr_mixture] = LOG_ZERO ; else { ((DiagonalGMM *)states[curr_state])->log_weights[curr_mixture] = log(curr_mixture_weight) ; } read_state = 8 ; } else if ( strstr(line,"") ) { if ( (read_state != 5) && (read_state != 8) ) error("PhoneModels::readModelsFromHTK - out of order\n") ; // Check that the number of means matches the number of features if ( sscanf( line , "%*s %d" , &temp ) != 1 ) error("PhoneModels::readModelsFromHTK - error reading number of means\n") ; if ( temp != n_features ) { error("PhoneModels::readModelsFromHTK - %d not match n_features=%d\n", temp , n_features ) ; } // If the distribution has not already been created, then create it. if ( states[curr_state] == NULL ) states[curr_state] = new DiagonalGMM( n_features, n_mixtures, NULL ) ; if ( (fgets(line,20000,models_fd)) == NULL ) error("PhoneModels::loadHtkModels - error reading values\n") ; value = strtok( line , " " ) ; for ( short j=0 ; jmeans[curr_mixture][j]=(real)atof(value) ; value = strtok( NULL , " " ) ; } read_state++ ; } else if ( strstr(line,"") ) { if ( (read_state != 6) && (read_state != 9) ) error("PhoneModels::readModelsFromHTK - out of order\n") ; // Check that the number of variances matches the number of features if ( sscanf( line , "%*s %d" , &temp ) != 1 ) error("PhoneModels::readModelsFromHTK - error reading number of variances\n") ; if ( temp != n_features ) { error("PhoneModels::readModelsFromHTK - %d != n_features=%d\n" , temp , n_features ) ; } // If the distribution has not already been created, then create it. if ( states[curr_state] == NULL ) states[curr_state] = new DiagonalGMM( n_features, n_mixtures, NULL ) ; if ( (fgets(line,20000,models_fd)) == NULL ) error("PhoneModels::loadHtkModels - error reading values\n") ; value = strtok( line , " " ) ; ((DiagonalGMM *)states[curr_state])->sum_log_var_plus_n_obs_log_2_pi[curr_mixture]= n_features * LOG_2_PI ; for ( short j=0 ; jvar[curr_mixture][j] = (real)atof(value) ; ((DiagonalGMM *)states[curr_state])->minus_half_over_var[curr_mixture][j] = -0.5 / (real)atof(value) ; ((DiagonalGMM *)states[curr_state])->sum_log_var_plus_n_obs_log_2_pi[curr_mixture] += log( (real)atof(value) ) ; value = strtok( NULL , " " ) ; } ((DiagonalGMM *)states[curr_state])->sum_log_var_plus_n_obs_log_2_pi[curr_mixture] *= -0.5 ; if ( read_state == 6 ) read_state = 4 ; else if ( read_state == 9 ) { if ( curr_mixture == (n_mixtures-1) ) read_state = 4 ; else read_state = 7 ; } } else if ( strstr(line,"") ) { if ( read_state != 4 ) error("PhoneModels::readModelsFromHTK - out of order\n") ; if ( curr_state != (n_states-2) ) error("PhoneModels::readModelsFromHTK - not all states encountered\n") ; for ( short j=0 ; j values\n") ; value = strtok( line , " " ) ; for ( short k=0 ; kpause_index) || ( (phone_index == phone_info->pause_index) && (apply_pause_del_pen == true) ) ) { log_trans[j][k] += log_phone_del_pen ; } } } value = strtok( NULL , " " ) ; } } // create the DecodingHMM models[phone_index] = new DecodingHMM( n_states , states , log_trans , emis_prob_indices ) ; for ( short j=0 ; j") ) { if ( read_state != 10 ) error("PhoneModels::readModelsFromHTK - out of order\n") ; phone_index = -1 ; read_state = 1 ; } else if ( strstr( line , "" ) ) { // Ignore } else if ( strstr( line , "" ) ) { // Ignore } else error("PhoneModels::readModelsFromHTK - unrecognised line\n%s\n",line) ; } } if ( n_models != phone_info->n_phones ) error("PhoneModels::readModelsFromHTK - n_models n_phones mismatch\n") ; fclose( models_fd ) ; } void PhoneModels::readModelsFromNoway( FILE *models_fd ) { // The model definitions in the input file must be in the same order // as the phone name file listing. char line[20000] , phone_name[1000] ; int phone_id=0 , phone_index=-1 ; char *num=NULL ; short n_states=0 , n_sucs , sucs[100] , temp_suc , state_id , prob_index=0 ; real trans[100] , temp_tran ; // Read the number of models, and check against the expected number. fgets( line , 20000 , models_fd ) ; if ( sscanf( line , "%d" , &n_models ) != 1 ) error("PhoneModels::readModelsFromNoway - error reading n_models\n") ; if ( n_models != phone_info->n_phones ) error("PhoneModels::readModelsFromNoway - n_models n_phones mismatch\n") ; // Allocate memory for the phone models and phone names models = (DecodingHMM **)Allocator::sysAlloc( n_models * sizeof(DecodingHMM *) ) ; for ( int i=0 ; i