Julia DataFrames & Data Science
Work with tabular data using DataFrames.jl.
Creating DataFrames
using DataFrames
# From vectors
df = DataFrame(
name = ["Alice", "Bob", "Charlie"],
age = [25, 30, 35],
score = [85.5, 92.0, 78.5]
)
# From dictionary
data = Dict(
"name" => ["Alice", "Bob"],
"age" => [25, 30]
)
df2 = DataFrame(data)
Basic Operations
# View first/last rows
first(df, 2)
last(df, 2)
# Get dimensions
size(df) # (rows, columns)
# Column operations
df.age # Access column
df[!, :age] # Another way
df.score .+ 5 # Vectorized operation
Filtering and Subsetting
# Filter rows
filter(row -> row.age > 25, df)
# Using @subset macro
using DataFramesMeta
@subset(df, :age > 25, :score > 80)
# Select columns
select(df, [:name, :age])
Grouping and Aggregation
# Group by and summarize
using Statistics
gdf = groupby(df, :age)
combine(gdf, :score => mean)
# Using @chain macro
using Chain
@chain df begin
groupby(:age)
combine(:score => mean => :avg_score)
end
Reading/Writing Data
using CSV
# Read CSV
data = CSV.read("data.csv", DataFrame)
# Write CSV
CSV.write("output.csv", df)
Data Visualization
using Plots, StatsPlots
# Scatter plot
@df df scatter(:age, :score, group=:name)
# Histogram
@df df histogram(:score)