StructField( "author" , StringType(), True ),
StructField( "description" , StringType(), True ),
StructField( "genre" , StringType(), True ),
StructField( "price" , DoubleType(), True ),
StructField( "publish_date" , StringType(), True ),
StructField( "title" , StringType(), True )])
df = spark.read \
. format ( 'xml' ) \
.options(rowTag= 'book' ) \
.load( 'books.xml' , schema = customSchema)
df.select( "author" , "_id" ).write \
. format ( 'xml' ) \
.options(rowTag= 'book' , rootTag= 'books' ) \
.save( 'newbooks.xml' )
R API
%r
df <- loadDF( "books.xml" , source = "xml" , rowTag = "book" )
# In this case, `rootTag` is set to "ROWS" and `rowTag` is set to "ROW".
saveDF(df, "newbooks.xml" , "xml" , "overwrite" )
%r
customSchema <- structType(
structField( "_id" , "string" ),
structField( "author" , "string" ),
structField( "description" , "string" ),
structField( "genre" , "string" ),
structField( "price" , "double" ),
structField( "publish_date" , "string" ),
structField( "title" , "string" ))
df <- loadDF( "books.xml" , source = "xml" , schema = customSchema, rowTag = "book" )
# In this case, `rootTag` is set to "ROWS" and `rowTag` is set to "ROW".
saveDF(df, "newbooks.xml" , "xml" , "overwrite" )
Read XML with Row Validation
val df = spark.read
.option( "rowTag" , "book" )
.option( "rowValidationXSDPath" , xsdPath)