# Uploading data

### **Create a new dataset**

```r
library(redivis)

# Could also create a dataset under an organization:
# dataset <- redivis$organization("Demo organization")$dataset("some dataset")
dataset <- redivis$user("your-username")$dataset("some dataset")

# public_access_level can be one of ('none', 'overview', 'metadata', 'sample', 'data')
dataset$create(public_access_level="overview")
```

### **Create a table and upload data**

```r
library(redivis)

dataset <- redivis$user("user_name")$dataset("dataset_name", version="next")

# Create a table on the dataset. Datasets may have multiple tables
table = (
    dataset
    $table("Table name")
    $create(description="Some description")
)

# Upload a file to the table. 
# You can create multiple uploads per table, in which case they'll be appended together.
upload = table$upload()$create(
    "./data.csv",           # Path to file, data.frame, raw vector, etc
    type="delimited",       # Inferred from file extension if not provided
    remove_on_fail=TRUE,    # Remove the upload if a failure occurs
    wait_for_finish=TRUE,   # Wait for the upload to finish processing
    raise_on_fail=TRUE      # Raise an error on failure
)
```

### **Upload non-tabular (unstructured) files**

```r
library(redivis)
dataset <- redivis$user("user_name")$dataset("dataset_name", version="next")

# Non-tabular files must be uploaded to file index tables
table <- dataset$table("my_files")$create(is_file_index=TRUE)

# upload all contents in a directory
table$add_files(directory="/path/to/directory/")

# upload specific files
table$add_files(files=list(
    list(path="/path/to/file.png"), # file name will be "file.png"
    list(path="/path/to/other/file.png", name="other_file.png"), # file name will be other_file.png
    list(data="Hello world", name="hello_world.txt") # Data can be string or raw vector 
    list(data=url("http://example.com"), name="example_com.html") # Data can be a connection
)
```

### **Upload data from an external source**

```r
# Assuming we get a reference to the table the same as above...

upload <- table$upload("data.csv")

upload$create(
    transfer_specification=list(
        sourceType="gcs", # one of gcs, s3, bigQuery, url, redivis
        sourcePath="my-bucket/path-to-my-file.csv", 
        # sourcePath="https://example.com/data-file", (for sourceType == "url")
        # sourcePath="workflow_name.dataset_name.table_name", (for sourceType == "bigQuery")
        # sourcePath="owner_name.dataset_or_workflow_name.table_name", (for sourceType == "redivis")
        identity="my_email@example.com" # The email associated with the data source
    ),
)
```

### Stream data to an upload

```r
library(redivis)

dataset <- redivis$user("user_name")$dataset("dataset_name", version="next")
table <- dataset$table("table_name")

# Providing a schema with the initial request is optional (but recommended).
# If not set, schema will be inferred based on the first batch of rows.
schema <- list(
  list(name = "var1", type = "string"),
  list(name = "var2", type = "integer"),
  list(name = "var3", type = "dateTime")
)

# Construct a data.frame to send (or alternatively, a stringified JSON array of objects)
rows <- data.frame(
  var1 = c("hello", "world"),
  var2 = c(1, 2),
  var3 = c(NA, "2020-01-01T00:00:00.123")
)

upload <- table$upload(name="my_stream")

# Create, or get a reference to an exisiting upload named "my_stream"
upload.create(type="stream", schema=schema, if_not_exists=True) 

insert_response = upload.insert_rows(rows)

# See REST API / uploads / insertRows
print(insert_response)
```

### **Release a new version**

```r
library(redivis)

dataset <- redivis$user("username")$dataset("some dataset", version="next")
dataset$release()
```

### **Create a subsequent version on an existing dataset**

```r
library(redivis)

dataset <- redivis$user("your-username")$dataset("some dataset")

# dataset$create_next_version will throw an error if a "next" version already exists,
# unless the ignore_if_exists argument is provided
dataset <- dataset$create_next_version(ignore_if_exists=TRUE)
table <- dataset$table("table name")

# By default, all new data is appended to the previous version of a table. 
# If you'd like to replace the previous data, update the upload_merge_strategy.
table$update(upload_merge_strategy="replace")

upload <- table$upload("data.csv")$create(
    "./data.csv",           # Path to file, data.frame, raw vector, etc
    # All additional params are optional; default values are shown here
    type="delimited",       # One of stream, delimited, csv, ndjson, avro, parquet, orc, xls, xlsx, dta, sas7bdat, sav
    skip_bad_records=FALSE,      
    has_header_row=TRUE,    # Only relevant for csv, xls(x)
    remove_on_fail=TRUE,    # Remove the upload if a failure occurs
    wait_for_finish=TRUE,   # Wait for the upload to finish processing
    raise_on_fail=TRUE      # Raise an error on failure
    
    # The following are only relevant for delimited files:
    allow_quoted_newlines=FALSE, # Allow newlines within cells. Setting to True will substantially reduce ingest performance.
    quote_character='"',         # The character used to escape delimiters within cells. Generally a double quote in compliant CSVs.
    delimiter=NULL,              # For delimited files, explicitly set the delimiter, otherwise the delimiter will be automatically inferred.
)

# When all uploads have finished, release the next version
dataset$release()
```


---

# Agent Instructions: Querying This Documentation

If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter:

```
GET https://docs.redivis.com/api/client-libraries/redivis-r/examples/uploading-data.md?ask=<question>
```

The question should be specific, self-contained, and written in natural language.
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
