Below is a simple demonstration of my issue. When I execute "Evaluate" I get an exception stating Schema mismatch for score column 'Score': expected known-size vector of Single, got Vector<Single>
The tutorial use a unknown-size vector so it should work?
https://learn.microsoft.com/en-us/dotnet/machine-learning/tutorials/iris-clustering
If I try with a fixed size score (decorating with a [VectorType(3)]
attribute) I instead get an Operation is not valid due to the current state of the object
exception (I assume it should be 3 since I ask for three clusters and the score should be three distances)
What am I doing wrong?
public class SampleInputModel
{
[VectorType(3)]
public float SimpleVector { get; set; }
[ColumnName("PredictedLabel")]
public uint PredictedClusterId;
[ColumnName("Score")]
public float[] Distances;
}
[Fact]
public void Sample()
{
var mlContext = new MLContext(seed: 1);
var random = new Random().NextDoubleSequence();
var dummyData = new SampleInputModel[100];
for (int i = 0; i < 100; i++)
{
dummyData[i] = new SampleInputModel { SimpleVector = new float[] { (float)random.ElementAt(i), (float)random.ElementAt(i), (float)random.ElementAt(i) } };
}
var trainingData = mlContext.Data.LoadFromEnumerable<SampleInputModel>(dummyData.Take(50));
var pipeline = mlContext.Transforms.Concatenate("Features", "SimpleVector");
pipeline.Append(mlContext.Clustering.Trainers.KMeans("Features", numberOfClusters: 3));
var trainedModel = pipeline.Fit(trainingData);
var testData = mlContext.Data.LoadFromEnumerable<SampleInputModel>(dummyData.TakeLast(50));
var predictions = trainedModel.Transform(testData);
var metrics = mlContext.Clustering.Evaluate(predictions, scoreColumnName: "Score", featureColumnName: "Features");
}