Class: Google::Cloud::Bigquery::External::CsvSource

Inherits:
DataSource
  • Object
show all
Defined in:
lib/google/cloud/bigquery/external.rb

Overview

CsvSource

CsvSource is a subclass of DataSource and represents a CSV external data source that can be queried from directly, such as Google Cloud Storage or Google Drive, even though the data is not stored in BigQuery. Instead of loading or streaming the data, this object references the external data source.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.autodetect = true
  csv.skip_leading_rows = 1
end

data = bigquery.query "SELECT * FROM my_ext_table",
                      external: { my_ext_table: csv_table }

data.each do |row|
  puts row[:name]
end

Instance Method Summary collapse

Methods inherited from DataSource

#autodetect, #autodetect=, #avro?, #backup?, #bigtable?, #compression, #compression=, #csv?, #format, #ignore_unknown, #ignore_unknown=, #json?, #max_bad_records, #max_bad_records=, #sheets?, #urls

Instance Method Details

#delimiterString

The separator for fields in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.delimiter = "|"
end

csv_table.delimiter #=> "|"

Returns:

  • (String)


794
795
796
# File 'lib/google/cloud/bigquery/external.rb', line 794

def delimiter
  @gapi.csv_options.field_delimiter
end

#delimiter=(new_delimiter) ⇒ Object

Set the separator for fields in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.delimiter = "|"
end

csv_table.delimiter #=> "|"

Parameters:

  • new_delimiter (String)

    New delimiter value



815
816
817
818
# File 'lib/google/cloud/bigquery/external.rb', line 815

def delimiter= new_delimiter
  frozen_check!
  @gapi.csv_options.field_delimiter = new_delimiter
end

#encodingString

The character encoding of the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "UTF-8"
end

csv_table.encoding #=> "UTF-8"

Returns:

  • (String)


705
706
707
# File 'lib/google/cloud/bigquery/external.rb', line 705

def encoding
  @gapi.csv_options.encoding
end

#encoding=(new_encoding) ⇒ Object

Set the character encoding of the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "UTF-8"
end

csv_table.encoding #=> "UTF-8"

Parameters:

  • new_encoding (String)

    New encoding value



726
727
728
729
# File 'lib/google/cloud/bigquery/external.rb', line 726

def encoding= new_encoding
  frozen_check!
  @gapi.csv_options.encoding = new_encoding
end

#fieldsObject

The fields of the schema.



975
976
977
# File 'lib/google/cloud/bigquery/external.rb', line 975

def fields
  schema.fields
end

#headersObject

The names of the columns in the schema.



982
983
984
# File 'lib/google/cloud/bigquery/external.rb', line 982

def headers
  schema.headers
end

#iso8859_1?Boolean

Checks if the character encoding of the data is "ISO-8859-1".

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "ISO-8859-1"
end

csv_table.encoding #=> "ISO-8859-1"
csv_table.iso8859_1? #=> true

Returns:

  • (Boolean)


773
774
775
# File 'lib/google/cloud/bigquery/external.rb', line 773

def iso8859_1?
  encoding == "ISO-8859-1"
end

#jagged_rowsBoolean

Indicates if BigQuery should accept rows that are missing trailing optional columns.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.jagged_rows = true
end

csv_table.jagged_rows #=> true

Returns:

  • (Boolean)


616
617
618
# File 'lib/google/cloud/bigquery/external.rb', line 616

def jagged_rows
  @gapi.csv_options.allow_jagged_rows
end

#jagged_rows=(new_jagged_rows) ⇒ Object

Set whether BigQuery should accept rows that are missing trailing optional columns.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.jagged_rows = true
end

csv_table.jagged_rows #=> true

Parameters:

  • new_jagged_rows (Boolean)

    New jagged_rows value



638
639
640
641
# File 'lib/google/cloud/bigquery/external.rb', line 638

def jagged_rows= new_jagged_rows
  frozen_check!
  @gapi.csv_options.allow_jagged_rows = new_jagged_rows
end

#quoteString

The value that is used to quote data sections in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quote = "'"
end

csv_table.quote #=> "'"

Returns:

  • (String)


837
838
839
# File 'lib/google/cloud/bigquery/external.rb', line 837

def quote
  @gapi.csv_options.quote
end

#quote=(new_quote) ⇒ Object

Set the value that is used to quote data sections in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quote = "'"
end

csv_table.quote #=> "'"

Parameters:

  • new_quote (String)

    New quote value



858
859
860
861
# File 'lib/google/cloud/bigquery/external.rb', line 858

def quote= new_quote
  frozen_check!
  @gapi.csv_options.quote = new_quote
end

#quoted_newlinesBoolean

Indicates if BigQuery should allow quoted data sections that contain newline characters in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quoted_newlines = true
end

csv_table.quoted_newlines #=> true

Returns:

  • (Boolean)


661
662
663
# File 'lib/google/cloud/bigquery/external.rb', line 661

def quoted_newlines
  @gapi.csv_options.allow_quoted_newlines
end

#quoted_newlines=(new_quoted_newlines) ⇒ Object

Set whether BigQuery should allow quoted data sections that contain newline characters in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quoted_newlines = true
end

csv_table.quoted_newlines #=> true

Parameters:

  • new_quoted_newlines (Boolean)

    New quoted_newlines value



683
684
685
686
# File 'lib/google/cloud/bigquery/external.rb', line 683

def quoted_newlines= new_quoted_newlines
  frozen_check!
  @gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
end

#schema(replace: false) {|schema| ... } ⇒ Google::Cloud::Bigquery::Schema

The schema for the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.schema do |schema|
    schema.string "name", mode: :required
    schema.string "email", mode: :required
    schema.integer "age", mode: :required
    schema.boolean "active", mode: :required
  end
end

Parameters:

  • replace (Boolean)

    Whether to replace the existing schema with the new schema. If true, the fields will replace the existing schema. If false, the fields will be added to the existing schema. The default value is false.

Yields:

  • (schema)

    a block for setting the schema

Yield Parameters:

  • schema (Schema)

    the object accepting the schema

Returns:



935
936
937
938
939
940
941
942
943
944
# File 'lib/google/cloud/bigquery/external.rb', line 935

def schema replace: false
  @schema ||= Schema.from_gapi @gapi.schema
  if replace
    frozen_check!
    @schema = Schema.from_gapi
  end
  @schema.freeze if frozen?
  yield @schema if block_given?
  @schema
end

#schema=(new_schema) ⇒ Object

Set the schema for the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_shema = bigquery.schema do |schema|
  schema.string "name", mode: :required
  schema.string "email", mode: :required
  schema.integer "age", mode: :required
  schema.boolean "active", mode: :required
end

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url
csv_table.schema = csv_shema

Parameters:

  • new_schema (Schema)

    The schema object.



967
968
969
970
# File 'lib/google/cloud/bigquery/external.rb', line 967

def schema= new_schema
  frozen_check!
  @schema = new_schema
end

#skip_leading_rowsInteger

The number of rows at the top of a CSV file that BigQuery will skip when reading the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.skip_leading_rows = 1
end

csv_table.skip_leading_rows #=> 1

Returns:

  • (Integer)


881
882
883
# File 'lib/google/cloud/bigquery/external.rb', line 881

def skip_leading_rows
  @gapi.csv_options.skip_leading_rows
end

#skip_leading_rows=(row_count) ⇒ Object

Set the number of rows at the top of a CSV file that BigQuery will skip when reading the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.skip_leading_rows = 1
end

csv_table.skip_leading_rows #=> 1

Parameters:

  • row_count (Integer)

    New skip_leading_rows value



903
904
905
906
# File 'lib/google/cloud/bigquery/external.rb', line 903

def skip_leading_rows= row_count
  frozen_check!
  @gapi.csv_options.skip_leading_rows = row_count
end

#utf8?Boolean

Checks if the character encoding of the data is "UTF-8". This is the default.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "UTF-8"
end

csv_table.encoding #=> "UTF-8"
csv_table.utf8? #=> true

Returns:

  • (Boolean)


750
751
752
753
# File 'lib/google/cloud/bigquery/external.rb', line 750

def utf8?
  return true if encoding.nil?
  encoding == "UTF-8"
end