efcore icon indicating copy to clipboard operation
efcore copied to clipboard

EF Core global query filter based on nullable property resulting in NullReferenceException

Open DaveSenn opened this issue 3 years ago • 1 comments

Description

Queries using the below global filter result in a NullReferenceException. It looks like EF always evaluates the second condition AllowedTenants.Contains( x.TenantId ) even if the first condition is true.

Include your code

public interface ITest
{
    IReadOnlyCollection<Int64>? GetTenants();
}

public sealed class Temp : IEfTenantProvider
{
    private readonly IArgosAuthorizationService _argosAuthorizationService;

    public IReadOnlyCollection<Int64>? GetTenants() =>
        null;
}

public class MyContextFactory : IDbContextFactory<MyContext>
{
    private readonly IDbContextFactory<MyContext> _pooledFactory;
    private readonly IReadOnlyCollection<Int64>? _allowedTenants;

    public MyContextFactory( IDbContextFactory<MyContext> pooledFactory, ITest tenant)
    {
        _pooledFactory = pooledFactory;
        _allowedTenants = tenant.GetTenants();
    }

    public MyContext CreateDbContext()
    {
        var context = _pooledFactory.CreateDbContext();
        context.AllowedTenants = _allowedTenants;
        return context;
    }
}

public class MyContext : DbContext
{
    public virtual DbSet<SomeTable> SomeTable { get; set; } = null!;

    public MyContext( DbContextOptions<MyContext> options )
        : base( options )
    {
    }

    public IReadOnlyCollection<Int64>? AllowedTenants { get; set; }
    protected override void OnModelCreating( ModelBuilder modelBuilder )
    {
        modelBuilder.Entity<SomeTable>().HasQueryFilter( x => AllowedTenants == null || AllowedTenants.Contains( x.TenantId ) );
    }
}

Include stack traces

System.NullReferenceException
  HResult=0x80004003
  Message=Object reference not set to an instance of an object.
  Source=Microsoft.EntityFrameworkCore.Relational
  StackTrace:
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.<VisitIn>g__ProcessInExpressionValues|29_0(SqlExpression valuesExpression, Boolean extractNullValues)
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.VisitIn(InExpression inExpression, Boolean allowOptimizedExpansion, Boolean& nullable)
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.Visit(SqlExpression sqlExpression, Boolean allowOptimizedExpansion, Boolean preserveColumnNullabilityInformation, Boolean& nullable)
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.VisitSqlBinary(SqlBinaryExpression sqlBinaryExpression, Boolean allowOptimizedExpansion, Boolean& nullable)
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.Visit(SqlExpression sqlExpression, Boolean allowOptimizedExpansion, Boolean preserveColumnNullabilityInformation, Boolean& nullable)
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.Visit(SqlExpression sqlExpression, Boolean allowOptimizedExpansion, Boolean& nullable)
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.Visit(SelectExpression selectExpression)
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.Visit(TableExpressionBase tableExpressionBase)
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.Visit(TableExpressionBase tableExpressionBase)
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.Visit(SelectExpression selectExpression)
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.Visit(TableExpressionBase tableExpressionBase)
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.Visit(TableExpressionBase tableExpressionBase)
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.Visit(SelectExpression selectExpression)
   at Microsoft.EntityFrameworkCore.Query.SqlNullabilityProcessor.Process(SelectExpression selectExpression, IReadOnlyDictionary`2 parameterValues, Boolean& canCache)
   at Microsoft.EntityFrameworkCore.Query.RelationalParameterBasedSqlProcessor.Optimize(SelectExpression selectExpression, IReadOnlyDictionary`2 parametersValues, Boolean& canCache)
   at Microsoft.EntityFrameworkCore.SqlServer.Query.Internal.SqlServerParameterBasedSqlProcessor.Optimize(SelectExpression selectExpression, IReadOnlyDictionary`2 parametersValues, Boolean& canCache)
   at Microsoft.EntityFrameworkCore.Query.Internal.RelationalCommandCache.GetRelationalCommandTemplate(IReadOnlyDictionary`2 parameters)
   at Microsoft.EntityFrameworkCore.Internal.RelationCommandCacheExtensions.RentAndPopulateRelationalCommand(RelationalCommandCache relationalCommandCache, RelationalQueryContext queryContext)
   at Microsoft.EntityFrameworkCore.Query.RelationalShapedQueryCompilingExpressionVisitor.ShaperProcessingExpressionVisitor.<<PopulateSplitIncludeCollectionAsync>g__InitializeReaderAsync|63_0>d`2.MoveNext()
   at Microsoft.EntityFrameworkCore.Storage.ExecutionStrategy.<>c__DisplayClass33_0`2.<<ExecuteAsync>b__0>d.MoveNext()
   at Microsoft.EntityFrameworkCore.Storage.ExecutionStrategy.<ExecuteImplementationAsync>d__34`2.MoveNext()
   at Microsoft.EntityFrameworkCore.Storage.ExecutionStrategy.<ExecuteImplementationAsync>d__34`2.MoveNext()
   at Microsoft.EntityFrameworkCore.Storage.ExecutionStrategy.<ExecuteAsync>d__33`2.MoveNext()
   at Microsoft.EntityFrameworkCore.Query.RelationalShapedQueryCompilingExpressionVisitor.ShaperProcessingExpressionVisitor.<PopulateSplitIncludeCollectionAsync>d__63`2.MoveNext()
   at Microsoft.EntityFrameworkCore.Query.RelationalShapedQueryCompilingExpressionVisitor.ShaperProcessingExpressionVisitor.<TaskAwaiter>d__69.MoveNext()
   at Microsoft.EntityFrameworkCore.Query.Internal.SplitQueryingEnumerable`1.AsyncEnumerator.<MoveNextAsync>d__19.MoveNext()
   at Microsoft.EntityFrameworkCore.Query.ShapedQueryCompilingExpressionVisitor.<SingleAsync>d__14`1.MoveNext()
   at Microsoft.EntityFrameworkCore.Query.ShapedQueryCompilingExpressionVisitor.<SingleAsync>d__14`1.MoveNext()

Include provider and version information

EF Core 6.0.8 SQL Server 2019

Microsoft Visual Studio Professional 2022 Version 17.3.2 VisualStudio.17.Release/17.3.2+32819.101 Microsoft .NET Framework Version 4.8.04084

Installed Version: Professional

Visual C++ 2022 00476-80000-00000-AA450 Microsoft Visual C++ 2022

ASP.NET and Web Tools 17.3.376.3011 ASP.NET and Web Tools

Azure App Service Tools v3.0.0 17.3.376.3011 Azure App Service Tools v3.0.0

Azure Functions and Web Jobs Tools 17.3.376.3011 Azure Functions and Web Jobs Tools

C# Tools 4.3.0-3.22412.4+c97184bafab9a34d61e85f1c1ef34f25283ce9ba C# components used in the IDE. Depending on your project type and settings, a different version of the compiler may be used.

Common Azure Tools 1.10 Provides common services for use by Azure Mobile Services and Microsoft Azure Tools.

Entity Framework Core Power Tools 2.5 Adds useful design-time EF Core DbContext features to the Visual Studio Solution Explorer context menu.

Microsoft JVM Debugger 1.0 Provides support for connecting the Visual Studio debugger to JDWP compatible Java Virtual Machines

Node.js Tools 1.5.40629.1 Commit Hash:3f5cc0329815af3ffb948f08857446d206a9af36 Adds support for developing and debugging Node.js apps in Visual Studio

NuGet Package Manager 6.3.0 NuGet Package Manager in Visual Studio. For more information about NuGet, visit https://docs.nuget.org/

Razor (ASP.NET Core) 17.0.0.2232702+e1d654e792aa2fe6646a6935bcca80ff0aff4387 Provides languages services for ASP.NET Core Razor.

SQL Server Data Tools 17.0.62207.04100 Microsoft SQL Server Data Tools

Switch Startup Project 4.2.76 Provides a toolbar dropdown box to switch between startup projects.

Test Adapter for Boost.Test 1.0 Enables Visual Studio's testing tools with unit tests written for Boost.Test. The use terms and Third Party Notices are available in the extension installation directory.

Test Adapter for Google Test 1.0 Enables Visual Studio's testing tools with unit tests written for Google Test. The use terms and Third Party Notices are available in the extension installation directory.

TypeScript Tools 17.0.10701.2001 TypeScript Tools for Microsoft Visual Studio

Visual Basic Tools 4.3.0-3.22412.4+c97184bafab9a34d61e85f1c1ef34f25283ce9ba Visual Basic components used in the IDE. Depending on your project type and settings, a different version of the compiler may be used.

Visual F# Tools 17.1.0-beta.22363.4+1b94f89d4d1f41f20f9be73c76f4b229d4e49078 Microsoft Visual F# Tools

Visual Studio IntelliCode 2.2 AI-assisted development for Visual Studio.

VsChromium 0.9.39 Collection of tools to help contributing code to the Chromium project.

VSColorOutput64 2022.2 Color output for build and debug windows - https://mike-ward.net/vscoloroutput

DaveSenn avatar Aug 31 '22 21:08 DaveSenn

@DaveSenn are you finding that EF logs the generated SQL before it executes? I'm getting the same outcome with a slightly different scenario - while I do have a global filter, it's not on a nullable field; I'm applying a more complex operation with the EF.Property<TProperty>(object, string) helper, like this:

switch (employerNumber)
{
    case AustralianBusinessNumber abn:
        ApplyPredicate(client => 
            EF.Property<string>(client, "Identifier") == abn.Value && 
            (EF.Property<string?>(client, "BranchNumber") == abn.BranchNumber || EF.Property<string?>(client, "BranchNumber") == null));
        break;
    case WithholdingPayerNumber wpn:
        ApplyPredicate(client => 
            EF.Property<string>(client, "Identifier") == wpn.Value);
        break;
}

BranchNumber is the only nullable column on my table (and the only nullable field on my entity model), but I can't see the SQL being generated since it only happens in production - that is, I've not been able to reproduce it yet locally.

I am currently using EF Core 6.0.3.

davidkeaveny avatar Sep 21 '22 09:09 davidkeaveny

Not sure if this is the same cause, but I also ran into a NullReferenceException in the SqlNullabilityProcessor.

My query is roughly like this:

	private async Task<Dictionary<string, List<string>>> GetData(string? userId, IEnumerable<string> ids)
	{
		var groups = await _context.MyData
			.AsNoTracking()
			.Where(x => x.UserId != userId)
			.Where(x => ids.Contains(x.DataId!))
			.GroupBy(x=> x.DataId!)
			.Select(grp => new { DataId = grp.Key, UserIds = grp.Select(x=> x.UserId!) })
			.ToListAsync();

		return groups.ToDictionary(grp => grp.DataId, grp => grp.UserIds.ToList());
	}

We're using Group -> Select -> ToList -> ToDictionary instead of just ToDictionary because the query failed to translate otherwise. This query has been working previously, up until we upgraded to .NET 7 and also updated EF packages.

Now, regardless of whether there might be something wrong with this query, our model, or whatever else, I'm going to say that there's definitely ALSO a bug in EF Core when processing this. Why? Because it shouldn't be possible for me to cause a NullReferenceException inside of EF's code... if there's something wrong with the query, it should say THAT instead.

So, looking at the EF code, the null reference is coming from this function in SqlNullabilityProcessor.cs when attempting to process the ids.Contains part of my query:

        (SqlConstantExpression ProcessedValuesExpression, List<object?> ProcessedValuesList, bool HasNullValue)
            ProcessInExpressionValues(SqlExpression valuesExpression, bool extractNullValues)
        {
            var inValues = new List<object?>();
            var hasNullValue = false;
            RelationalTypeMapping? typeMapping = null;

            IEnumerable? values = null;
            if (valuesExpression is SqlConstantExpression sqlConstant)
            {
                typeMapping = sqlConstant.TypeMapping;
                values = (IEnumerable)sqlConstant.Value!;
            }
            else if (valuesExpression is SqlParameterExpression sqlParameter)
            {
                DoNotCache();
                typeMapping = sqlParameter.TypeMapping;
                values = (IEnumerable?)ParameterValues[sqlParameter.Name];
                if (values == null)
                {
                    throw new NullReferenceException();
                }
            }

            foreach (var value in values!) // <-- values is null

The ! on values is wrong because values is in fact null. At the very least this should be replaced with some null check and specific exception with more information.

So why is it null? This happens because valuesExpression is neither of the expected types, instead it is a ConcreteColumnExpression. And how did we get here? From VisitIn:

        // for relational null semantics we don't need to extract null values from the array
        if (UseRelationalNulls
            || !(inExpression.Values is SqlConstantExpression || inExpression.Values is SqlParameterExpression))
        {
            var (valuesExpression, valuesList, _) = ProcessInExpressionValues(inExpression.Values!, extractNullValues: false);

This if statement is specifically calling this only when it has the wrong types. It basically says "if inExpression is not one of the types expected by ProcessInExpressionValues, we'll just go ahead and call it anyway and cause a crash."

It seems to me that one of these things should be changed:

  • VisitIn should change the conditions for calling ProcessInExpressionValues
  • ProcessInExpressionValues should handle more expression types
  • ProcessInExpressionValues should have a more specific exception for unexpected expression types

Note that this is in addition to anything that might actually be wrong with my query that might be preventing it from being translated... I can't tell if there are any other issues because of this bug.

excelkobayashi avatar Jan 09 '23 23:01 excelkobayashi

So why is it null? This happens because valuesExpression is neither of the expected types, instead it is a ConcreteColumnExpression.

Are you sure about that, how are you seeing that? valuesExpression should correspond to ids variable in your query (the instance on which Contains is being called). SQL databases in general do not support a column as a set of values for an IN statement.

I'm going to say that there's definitely ALSO a bug in EF Core when processing this. Why? Because it shouldn't be possible for me to cause a NullReferenceException inside of EF's code...

Take a look 2 rows above the code you just referred to:

if (values == null)
{
    throw new NullReferenceException();
}

EF intentionally throws a NullReferenceException when the parameter (ids in this case) is null; this simulates what would happen if you ran the LINQ query against in-memory objects with LINQ to Objects.

Can you please check to confirm whether ids is null when you execute your query? If not, can you please submit a minimal, runnable code sample that produces this?

roji avatar Jan 09 '23 23:01 roji

Are you sure about that, how are you seeing that? valuesExpression should correspond to ids variable in your query (the instance on which Contains is being called). SQL databases in general do not support a column as a set of values for an IN statement.

I have the debugger stopped on it. It's a column on a select where the projection eventually resolves it to this parameter, it just hasn't been simplified to reference the parameter directly. This is what I meant by when I was saying that there might be other issues preventing the query from translating fully.

Can you please check to confirm whether ids is null when you execute your query?

The collection is absolutely not null, nor does it contain any null values. I've tested with multiple different kinds of inputs including a List or HashSet with 0, 1 or many items.

Everything works fine if the list has 0 items, but crashes otherwise.

If not, can you please submit a minimal, runnable code sample that produces this?

Maybe later when I have time... right now I have to figure out how to work around this problem.

excelkobayashi avatar Jan 09 '23 23:01 excelkobayashi

If not, can you please submit a minimal, runnable code sample that produces this?

Maybe later when I have time... right now I have to figure out how to work around this problem

If you can put together a repro, we can investigate and probably advise on a workaround. That really is the best way to move forward here.

roji avatar Jan 10 '23 00:01 roji

Reproduced. Looks like query splitting is a prerequisite.

Program.cs:

using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.DependencyInjection;

namespace QueryTest;

internal static class Program
{
	private static async Task Main()
	{
		using ServiceProvider services = Setup.CreateServices();

		using(IServiceScope seedScope = services.CreateScope())
		{
			MyContext context = seedScope.ServiceProvider.GetRequiredService<MyContext>();
			await Setup.Seed(context);
		}

		string userId = "1";
		string[] valueIds = new[] { "A", "B" };

		using(IServiceScope scope1 = services.CreateScope())
		{
			MyContext context = scope1.ServiceProvider.GetRequiredService<MyContext>();
			_ = await context.MyData.FindOtherUserValues1(userId, valueIds);
		}

		using(IServiceScope scope2 = services.CreateScope())
		{
			MyContext context = scope2.ServiceProvider.GetRequiredService<MyContext>();
			_ = await context.MyData.FindOtherUserValues2(userId, valueIds);
		}
	}
}

internal record MyData(string UserId, string? ValueId, int Id = 0) { }

internal static class Setup
{
	private static readonly IReadOnlyList<string> _seedUserIds = new string[] { "1", "2", "3" };
	private static readonly IReadOnlyList<string?> _seedValueIds = new string?[] { "A", "B", "C", null };

	public static ServiceProvider CreateServices()
	{
		return new ServiceCollection()
			.AddDbContext<MyContext>()
			.BuildServiceProvider(true);
	}

	public static async Task Seed(MyContext context)
	{
		_ = await context.Database.EnsureDeletedAsync();
		_ = await context.Database.EnsureCreatedAsync();

		foreach(string userId in _seedUserIds)
		{
			foreach(string? valueId in _seedValueIds)
			{
				MyData data = new(userId, valueId);
				_ = context.MyData.Add(data);
			}
		}

		_ = await context.SaveChangesAsync();
	}
}

internal class MyContext : DbContext
{
	required public DbSet<MyData> MyData { get; init; }

	protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
	{
		_ = optionsBuilder.UseSqlServer(@$"Server=(localdb)\mssqllocaldb;Database={nameof(QueryTest)};ConnectRetryCount=0",
			options => options.UseQuerySplittingBehavior(QuerySplittingBehavior.SplitQuery));
	}
}

internal static class Queries
{
	public static async Task<IReadOnlyDictionary<string, IReadOnlyList<string>>> FindOtherUserValues1(this IQueryable<MyData> source, string? excludeUserId, IEnumerable<string> includeValueIds, CancellationToken cancellationToken = default)
	{
		IQueryable<MyData> dataQuery = source
			.AsNoTracking()
			.Where(md => md.UserId != excludeUserId)
			.Where(md => includeValueIds.Contains(md.ValueId!));

		List<MyData> matches = await dataQuery.ToListAsync(cancellationToken);

		var groupOnClient = matches
			.GroupBy(md => md.ValueId!)
			.Select(grp => new { ValueId = grp.Key, UserIds = grp.Select(md => md.UserId!) });

		// No crash
		return groupOnClient.ToDictionary(grp => grp.ValueId, grp => (IReadOnlyList<string>)grp.UserIds.ToList());
	}

	public static async Task<IReadOnlyDictionary<string, IReadOnlyList<string>>> FindOtherUserValues2(this IQueryable<MyData> source, string? excludeUserId, IEnumerable<string> includeValueIds, CancellationToken cancellationToken = default)
	{
		IQueryable<MyData> dataQuery = source
			.AsNoTracking()
			.Where(md => md.UserId != excludeUserId)
			.Where(md => includeValueIds.Contains(md.ValueId!));

		var groupOnServer = dataQuery
			.GroupBy(md => md.ValueId!)
			.Select(grp => new { ValueId = grp.Key, UserIds = grp.Select(md => md.UserId!) });

		// Crashes here
		var groups = await groupOnServer.ToListAsync(cancellationToken);

		return groups.ToDictionary(grp => grp.ValueId, grp => (IReadOnlyList<string>)grp.UserIds.ToList());
	}
}

QueryTest.csproj:

<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net7.0</TargetFramework>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" Version="7.0.2" />
  </ItemGroup>

</Project>

excelkobayashi avatar Jan 10 '23 21:01 excelkobayashi

For the record, the reason we were doing Select > ToListAsync > ToDictionary instead of just ToDictionaryAsync is because the ToDictionaryAsync query wouldn't translate, but this version appeared to work for a while...

I suspect that either something changed to make this version not translate either, or else it just only fails in certain circumstances and we hadn't noticed until now.

I noticed that the query actually does work if you do one of these:

  • Disable query splitting
  • Pass an empty list for includeValueIds
  • Do the grouping on the client (as demonstrated in the sample code)

excelkobayashi avatar Jan 10 '23 21:01 excelkobayashi

@excelkobayashi thanks for the quality repro - I can confirm I can see the bug and am preparing the fix.

roji avatar Jan 10 '23 23:01 roji

@excelkobayashi on second thought, your issue doesn't seem the same as the original one - opened #30022 to track this.

roji avatar Jan 10 '23 23:01 roji