@inproceedings{a469810f82ea4a7394e915801f740351,
title = "Leaving the Span",
abstract = "We discuss a simple sparse linear problem that is hard to learn with any algorithm that uses a linear combination of the training instances as its weight vector. The hardness holds even if we allow the learner to embed the instances into any higher dimensional feature space (and use a kernel function to define the dot product between the embedded instances). These algorithms are inherently limited by the fact that after seeing k instances only a weight space of dimension k can be spanned. Our hardness result is surprising because the same problem can be efficiently learned using the exponentiated gradient (EG) algorithm: Now the component-wise logarithms of the weights are essentially a linear combination of the training instances and after seeing k instances. This algorithm enforces additional constraints on the weights (all must be non-negative and sum to one) and in some cases these constraints alone force the rank of the weight space to grow as fast as 2k.",
author = "Warmuth, {Manfred K.} and Vishwanathan, {S. V.N.}",
year = "2005",
doi = "10.1007/11503415_25",
language = "English",
isbn = "3540265562",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "366--381",
booktitle = "Learning Theory - 18th Annual Conference on Learning Theory, COLT 2005, Proceedings",
address = "Germany",
note = "18th Annual Conference on Learning Theory, COLT 2005 - Learning Theory ; Conference date: 27-06-2005 Through 30-06-2005",
}