Giorgio... beware of using cbind to form data frames from vectors.
It is inefficient in use of memory, doesn't set column names, and
will convert all columns to character if you do it with any vector
columns that are of character type. Below are three revamps of this
example.
Using cbind with data frames as input fixes most of these problems,
but you are still better off using the "data.frame" function in most
cases.
#--- base R, simplified
x <- seq( 1:100 )
y1 <- x * x
df1 <- data.frame( x = x, value = y1, stringsAsFactors = FALSE )
# scalar automatically repeated to number of rows
df1$variable <- "y1"
y2 <- y1 + 1500
df2 <- data.frame(x=x, value=y2, stringsAsFactors=FALSE)
df2$variable <- "y2"
y3 <- y1 + 6000
df3 <- data.frame( x = x, value = y3, stringsAsFactors=FALSE)
df3$variable <- "y3"
avg <- ( y1 + y2 + y3 ) / 3
df4 <- data.frame( x = x, value = avg, variable = g4 )
df4$variable <- "average"
df <- rbind( df1, df2, df3, df4 )
df$variable <- factor( df$variable
, levels=c( "y1", "y2", "y3", "average" ) )
# this is the data to start with ggplot()
df
library(ggplot2)
# this example you made goes to all the effort to put the data into one
# data frame, and then fails to make use of the automatic legend creation
# feature of ggplot
ggplot( data = df[ -sel, ]
, aes( x=x, y=value, group=variable ) ) +
geom_line() +
geom_line( data = df[ sel, ]
, mapping = aes( x=x, y=value, group=variable )
, size=0.5, linetype="dashed", color="blue" )
# the output of this one is easier to interpret
ggplot( df
, aes( x=x
, y=value
, colour=variable
, linetype=variable
, size=variable )
) +
geom_line() +
scale_colour_manual( name = "Curve"
, values = c( "red", "green", "blue", "black" ) ) +
scale_linetype_manual( name = "Curve", values = c( 1, 1, 1, 2 ) ) +
scale_size_manual( name="Curve", values = c( 1, 1, 1, 0.5 ) )
#----- base R data manipulation, a little more sophisticated
library( ggplot2 )
# There are better ways to make these kinds of y1, y2 etc dependent
# variables
df0 <- data.frame( x = seq( 1:100 ) )
df0 <- within( df0
, {
y1 <- x * x
y2 <- y1 + 1500
y3 <- y1 + 6000
}
)
# rowMeans can work with an arbitrary number of columns
df0$average <- rowMeans( df0[ , c( "y1", "y2", "y3" ) ] )
# take a look
df0
# in "wide" format...
# base R has the reshape function to convert to "long" format... the
# arguments are a bit complicated to remember though (compare with next
# example)
vars <- c( "y1", "y2", "y3", "average" )
df <- reshape( df0
, idvar="x"
, varying = vars
, v.names="value"
, times=vars
, timevar = "variable"
, direction = "long" )
# convert character labels to factor
# the levels of the factor define the order in which colors and linetypes
# are specified
df$variable <- factor( df$variable, levels = vars )
# using the same input data for colour, linetype and size causes the three
# legends to be combined
ggplot( df
, aes( x=x
, y=value
, colour=variable
, linetype=variable
, size=variable
)
) +
geom_line() +
scale_colour_manual( name = "Curve"
, values = c( "red", "green", "blue", "black" ) ) +
scale_linetype_manual( name = "Curve", values = c( 1, 1, 1, 2 ) ) +
scale_size_manual( name="Curve", values = c( 1, 1, 1, 0.5 ) )
#---- Nonstandard syntax from dplyr, easier to remember and use on the fly
# but requires some more contributed packages
library( ggplot2 )
library( dplyr )
library( tidyr )
# dplyr "pipes" data from one function to the next
# read about it in the vignettes for the "dplyr" and "magrittr" packages
df0 <- ( data.frame( x = seq( 1:100 ) )
%>% mutate( y1 = x * x
, y2 = y1 + 1500
, y3 = y1 + 6000
)
)
# make a note of all names except the first column in this case
vars <- names( df0 )[ -1 ]
# if you need to refer to the whole dataset in the pipeline of functions,
# the "." refers to the data frame as it exists at that point.
df0 <- ( df0
%>% mutate( average = rowMeans( .[ , vars ] ) )
)
# all names with average too
allvars <- names( df0 )[ -1 ]
# "gather" all columns except x into a "value" column, with labels in
# "variable" column and make variable column into a factor with specified
# sequence of levels
df <- ( df0
%>% gather( variable, value, -x )
%>% mutate( variable = factor( variable, levels = allvars ) )
)
# define some colours and linetype values in desired order corresponding
# to levels of "variable"
colv <- c( rainbow( length( vars ) ), "black" )
lntypv <- c( rep( 1, length( vars ) ), 2 )
szv <- c( rep( 1, length( vars ) ), 0.5 )
ggplot( df
, aes( x=x
, y=value
, colour=variable
, linetype=variable
, size=variable
)
) +
geom_line() +
scale_colour_manual( name = "Curve", values = colv ) +
scale_linetype_manual( name = "Curve", values = lntypv ) +
scale_size_manual( name = "Curve", values = szv )
On Fri, 25 Dec 2015, Giorgio Garziano wrote:
Hi Marna,
here is another example that should appear more similar to your scenario
than my previous one.
x <- seq(1:100)
y1 <- x*x
g1 <- rep("y1", 100)
df1 <- as.data.frame(cbind(x, y1), stringsAsFactors=FALSE)
df1 <- as.data.frame(cbind(df1, g1))
colnames(df1)<- c("x", "value", "variable")
y2 <- y1+1500
g2 <- rep("y2", 100)
df2 <- as.data.frame(cbind(x, y2), stringsAsFactors=FALSE)
df2 <- as.data.frame(cbind(df2, g2))
colnames(df2)<- c("x", "value", "variable")
y3 <- y1+6000
g3 <- rep("y3", 100)
df3 <- as.data.frame(cbind(x, y3), stringsAsFactors=FALSE)
df3 <- as.data.frame(cbind(df3, g3))
colnames(df3)<- c("x", "value", "variable")
avg <- (y1+y2+y3)/3
df4 <- as.data.frame(cbind(x, avg))
g4 <- rep("average", 100)
df4 <- as.data.frame(cbind(df4, g4))
colnames(df4) <- c("x", "value", "variable")
df <- data.frame(rbind(df1, df2, df3, df4))
# this is the data to start with ggplot()
df
# the df rows where the average value is stored
sel <- which(df[,"variable"]=="average")
library(ggplot2)
ggplot(data = df[-sel,], aes(x=x, y=value, group=variable)) + geom_line() +
geom_line(data = df[sel,], aes(x=x, y=value, group=variable), size=0.5,
linetype="dashed", color="blue")
Merry Christmas,
--
GG
[[alternative HTML version deleted]]
______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
---------------------------------------------------------------------------
Jeff Newmiller The ..... ..... Go Live...
DCN:<jdnew...@dcn.davis.ca.us> Basics: ##.#. ##.#. Live Go...
Live: OO#.. Dead: OO#.. Playing
Research Engineer (Solar/Batteries O.O#. #.O#. with
/Software/Embedded Controllers) .OO#. .OO#. rocks...1k
______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.