FSharp.Data icon indicating copy to clipboard operation
FSharp.Data copied to clipboard

CsvFile.Load does not work correctly with slow input streams

Open peterzeller opened this issue 7 years ago • 1 comments

When using CsvFile.Load with Console.OpenStandardInput() as input, only the first line is read in the CsvFile. When a file is piped into stdin, all lines are considered.

Here is a failing unit test, which shows the problem without relying on stdin:


[<Test>]
let ``Should work with slow streams`` () =
  let input = "a,10\nb,20\nc,30\nd,40\ne,50\n"B
  // creates a stream, which reads input 2 characters at a time:
  let myStream() =
    let mutable position = 0
    { new Stream () with
     member this.CanRead
       with get() = true
     member this.CanSeek
       with get() = false
     member this.CanWrite
       with get() = false
     member this.Length
       with get() = failwith "not supported"
     member this.Position
       with get() = int64 position
        and set(value: int64): unit = failwith "not supported"
     member this.Flush() = failwith "not supported"
     member this.Read(buffer: byte[], offset: int, count: int): int =
        let mutable c = 0
        let max = 1 // change this to read more bytes at once
        for i = 0 to count - 1 do
          if position < input.Length && c < max then
            buffer.[offset + i] <- input.[position]
            position <- position + 1
            c <- c + 1
        c
     member this.Seek(offset: int64, origin: SeekOrigin): int64 = failwith "not supported"
     member this.SetLength(len: int64): unit = failwith "not supported"
     member this.Write(buffer: byte[], offset: int, count: int): unit = failwith "not supported"
   }

  let toList(rows: seq<CsvRow>) = rows |> Seq.map (fun r -> (r.[0], r.[1])) |> Seq.toList

  // read directly from the stream:
  let actual = FSharp.Data.CsvFile.Load(myStream(), hasHeaders = false).Cache().Rows |> toList

  // first read stream into string and then read:
  let myStreamAsString = new StringReader((new StreamReader(myStream())).ReadToEnd())
  let expected = FSharp.Data.CsvFile.Load(myStreamAsString, hasHeaders = false).Cache().Rows |> toList



  printfn "actual = %A" actual
  printfn "expected = %A" expected
  actual |> should equal expected

This prints:

 actual = [("a", "10")]
 expected = [("a", "10"); ("b", "20"); ("c", "30"); ("d", "40"); ("e", "50")]

Tested with 9caffdbea5ef847b6d227ce358b282f28b1315fa on dotnet 2.1.401.

peterzeller avatar Aug 28 '18 13:08 peterzeller

That is really odd, did you debug to try to figure out what's the cause?

ovatsus avatar Oct 14 '18 11:10 ovatsus