Commit 50300cdb authored by MichelJuillard's avatar MichelJuillard
Browse files

starting io.jl

parent ce62b64b
Pipeline #5864 passed with stage
in 25 seconds
......@@ -17,7 +17,7 @@ struct Period <: AbstractPeriod
- `Semester`,
- `Quarter`,
- `Month`,
- ` Week`,
- `Week`,
- `Day`, and
- `Undated` (no frequency)
"""
......
"""
AbstractPeriodToken
A token used in parsing or formatting a period string. Each subtype must
define the tryparsenext and format methods.
"""
abstract type AbstractPeriodToken end
"""
tryparsenext(tok::AbstractPeriodToken, str::String, i::Int, len::Int)
`tryparsenext` parses for the `tok` token in `str` starting at index `i`.
`len` is the length of the string.
If parsing succeeds, returns a tuple of 2 elements `(res, idx)`, where:
* `res` is the result of the parsing.
* `idx::Int`, is the index _after_ the index at which parsing ended.
"""
function tryparsenext end
### Token types ###
struct PeriodPart{letter} <: AbstractPeriodToken
width::Int
fixed::Bool
end
@inline min_width(d::PeriodPart) = d.fixed ? d.width : 1
@inline max_width(d::PeriodPart) = d.fixed ? d.width : 0
function _show_content(io::IO, d::PeriodPart{c}) where c
for i = 1:d.width
print(io, c)
end
end
function Base.show(io::IO, d::PeriodPart{c}) where c
print(io, "PeriodPart(")
_show_content(io, d)
print(io, ")")
end
### Parse tokens
for c in "ysqmwd"
@eval begin
@inline function tryparsenext(d::PeriodPart{$c}, str, i, len)
return tryparsenext_base10(str, i, len, min_width(d), max_width(d))
end
end
end
for (tok, fn) in zip("uU", Any[monthabbr_to_value, monthname_to_value, dayabbr_to_value, dayname_to_value])
@eval @inline function tryparsenext(d::PeriodPart{$tok}, str, i, len, locale)
next = tryparsenext_word(str, i, len, locale, max_width(d))
next === nothing && return nothing
word, i = next
val = $fn(word, locale)
val == 0 && return nothing
return val, i
end
end
### Format tokens
for (c, fn) in zip("ysqmwd", Any[year, semester, quarter, month, week])
@eval function format(io, d::PeriodPart{$c}, p)
print(io, string($fn(p), base = 10, pad = d.width))
end
end
for (tok, fn) in zip("uU", Any[monthabbr, monthname])
@eval function format(io, d::PeriodPart{$tok}, p)
print(io, $fn(month(p)))
end
end
### Delimiters
struct Delim{T, length} <: AbstractPeriodToken
d::T
end
Delim(d::T) where {T<:AbstractChar} = Delim{T, 1}(d)
Delim(d::String) = Delim{String, length(d)}(d)
@inline function tryparsenext(d::Delim{<:AbstractChar, N}, str, i::Int, len) where N
for j = 1:N
i > len && return nothing
next = iterate(str, i)
@assert next !== nothing
c, i = next
c != d.d && return nothing
end
return true, i
end
@inline function tryparsenext(d::Delim{String, N}, str, i::Int, len) where N
i1 = i
i2 = firstindex(d.d)
for j = 1:N
if i1 > len
return nothing
end
next1 = iterate(str, i1)
@assert next1 !== nothing
c1, i1 = next1
next2 = iterate(d.d, i2)
@assert next2 !== nothing
c2, i2 = next2
if c1 != c2
return nothing
end
end
return true, i1
end
@inline function format(io, d::Delim, dt, locale)
print(io, d.d)
end
function _show_content(io::IO, d::Delim{<:AbstractChar, N}) where N
if d.d in keys(CONVERSION_SPECIFIERS)
for i = 1:N
print(io, '\\', d.d)
end
else
for i = 1:N
print(io, d.d)
end
end
end
function _show_content(io::IO, d::Delim)
for c in d.d
if c in keys(CONVERSION_SPECIFIERS)
print(io, '\\')
end
print(io, c)
end
end
function Base.show(io::IO, d::Delim)
print(io, "Delim(")
_show_content(io, d)
print(io, ")")
end
### DateFormat construction
abstract type DayOfWeekToken end # special addition to Period types
# Map conversion specifiers or character codes to tokens.
# Note: Allow addition of new character codes added by packages
const CONVERSION_SPECIFIERS = Dict{Char, Type}(
'y' => Year,
's' => Semester,
'q' => Quarter,
'm' => Month,
'u' => Month,
'U' => Month,
'w' => Week,
'd' => Day,
)
# Default values are needed when a conversion specifier is used in a DateFormat for parsing
# and we have reached the end of the input string.
# Note: Allow `Any` value as a default to support extensibility
const CONVERSION_DEFAULTS = IdDict{Type, Any}(
Year => Int64(1),
Semester => Int64(1),
Quarter => Int64(1),
Month => Int64(1),
Week => Int64(0),
Day => Int64(1),
)
# Specifies the required fields in order to parse a TimeType
# Note: Allows for addition of new TimeTypes
const CONVERSION_TRANSLATIONS = IdDict{Type, Any}(
Date => (Year, Month, Day),
)
"""
DateFormat(format::AbstractString, locale="english") -> DateFormat
Construct a date formatting object that can be used for parsing date strings or
formatting a date object as a string. The following character codes can be used to construct the `format`
string:
| Code | Matches | Comment |
|:-----------|:----------|:-------------------------------------------------------------|
| `y` | 1996, 96 | Returns year of 1996, 0096 |
| `s` | 1, 2 | Returns semester 1 or 2 |
| `q` | 1, ..., 4 | Returns quarter 1, 2, 3, 4 |
| `m` | 1, 01 | Matches 1 or 2-digit months |
| `u` | Jan | Matches abbreviated months according to the `locale` keyword |
| `U` | January | Matches full month names according to the `locale` keyword |
| `w` | 1,..., 53 | Returns week 01 to 53 |
| `d` | 1, 01 | Matches 1 or 2-digit days |
| `yyyymmdd` | 19960101 | Matches fixed-width year, month, and day |
Characters not listed above are normally treated as delimiters between slots.
For example a `dt` string of "1996-Q1" would have a `format` string like
"y-Qq". If you need to use a code character as a delimiter you can escape it using
backslash. The date "1995y01m" would have the format "y\\ym\\m".
Creating a PeriodFormat object is expensive. Whenever possible, create it once and use it many times
or try the [`periodformat""`](@ref @periodformat_str) string macro. Using this macro creates the PeriodFormat
object once at macro expansion time and reuses it later. There are also several [pre-defined formatters](@ref
Common-Period-Formatters), listed later.
See [`DateTime`](@ref) and [`format`](@ref) for how to use a DateFormat object to parse and write Date strings
respectively.
"""
function DateFormat(f::AbstractString)
tokens = AbstractDateToken[]
prev = ()
prev_offset = 1
letters = String(collect(keys(CONVERSION_SPECIFIERS)))
for m in eachmatch(Regex("(?<!\\\\)([\\Q$letters\\E])\\1*"), f)
tran = replace(f[prev_offset:prevind(f, m.offset)], r"\\(.)" => s"\1")
if !isempty(prev)
letter, width = prev
typ = CONVERSION_SPECIFIERS[letter]
push!(tokens, PeriodPart{letter}(width, isempty(tran)))
end
if !isempty(tran)
push!(tokens, Delim(length(tran) == 1 ? first(tran) : tran))
end
letter = f[m.offset]
width = length(m.match)
prev = (letter, width)
prev_offset = m.offset + width
end
tran = replace(f[prev_offset:lastindex(f)], r"\\(.)" => s"\1")
if !isempty(prev)
letter, width = prev
typ = CONVERSION_SPECIFIERS[letter]
push!(tokens, PeriodPart{letter}(width, false))
end
if !isempty(tran)
push!(tokens, Delim(length(tran) == 1 ? first(tran) : tran))
end
tokens_tuple = (tokens...,)
return DateFormat{Symbol(f),typeof(tokens_tuple)}(tokens_tuple, locale)
end
function DateFormat(f::AbstractString, locale::AbstractString)
DateFormat(f, LOCALES[locale])
end
function Base.show(io::IO, df::DateFormat)
print(io, "dateformat\"")
for t in df.tokens
_show_content(io, t)
end
print(io, '"')
end
Base.Broadcast.broadcastable(x::DateFormat) = Ref(x)
"""
dateformat"Y-m-d H:M:S"
Create a [`DateFormat`](@ref) object. Similar to `DateFormat("Y-m-d H:M:S")`
but creates the DateFormat object once during macro expansion.
See [`DateFormat`](@ref) for details about format specifiers.
"""
macro dateformat_str(str)
DateFormat(str)
end
# Standard formats
"""
Dates.ISODateTimeFormat
Describes the ISO8601 formatting for a date and time. This is the default value for `Dates.format`
of a `DateTime`.
# Example
```jldoctest
julia> Dates.format(DateTime(2018, 8, 8, 12, 0, 43, 1), ISODateTimeFormat)
"2018-08-08T12:00:43.001"
```
"""
const ISODateTimeFormat = DateFormat("yyyy-mm-dd\\THH:MM:SS.s")
default_format(::Type{DateTime}) = ISODateTimeFormat
"""
Dates.ISODateFormat
Describes the ISO8601 formatting for a date. This is the default value for `Dates.format` of a `Date`.
# Example
```jldoctest
julia> Dates.format(Date(2018, 8, 8), ISODateFormat)
"2018-08-08"
```
"""
const ISODateFormat = DateFormat("yyyy-mm-dd")
default_format(::Type{Date}) = ISODateFormat
"""
Dates.ISOTimeFormat
Describes the ISO8601 formatting for a time. This is the default value for `Dates.format` of a `Time`.
# Example
```jldoctest
julia> Dates.format(Time(12, 0, 43, 1), ISOTimeFormat)
"12:00:43.001"
```
"""
const ISOTimeFormat = DateFormat("HH:MM:SS.s")
default_format(::Type{Time}) = ISOTimeFormat
"""
Dates.RFC1123Format
Describes the RFC1123 formatting for a date and time.
# Example
```jldoctest
julia> Dates.format(DateTime(2018, 8, 8, 12, 0, 43, 1), RFC1123Format)
"Wed, 08 Aug 2018 12:00:43"
```
"""
const RFC1123Format = DateFormat("e, dd u yyyy HH:MM:SS")
### API
const Locale = Union{DateLocale, String}
"""
DateTime(dt::AbstractString, format::AbstractString; locale="english") -> DateTime
Construct a `DateTime` by parsing the `dt` date time string following the
pattern given in the `format` string (see [`DateFormat`](@ref) for syntax).
!!! note
This method creates a `DateFormat` object each time it is called. It is recommended
that you create a [`DateFormat`](@ref) object instead and use that as the second
argument to avoid performance loss when using the same format repeatedly.
# Example
```jldoctest
julia> DateTime("2020-01-01", "yyyy-mm-dd")
2020-01-01T00:00:00
julia> a = ("2020-01-01", "2020-01-02");
julia> [DateTime(d, dateformat"yyyy-mm-dd") for d ∈ a] # preferred
2-element Vector{DateTime}:
2020-01-01T00:00:00
2020-01-02T00:00:00
```
"""
function DateTime(dt::AbstractString, format::AbstractString; locale::Locale=ENGLISH)
return parse(DateTime, dt, DateFormat(format, locale))
end
"""
DateTime(dt::AbstractString, df::DateFormat=ISODateTimeFormat) -> DateTime
Construct a `DateTime` by parsing the `dt` date time string following the
pattern given in the [`DateFormat`](@ref) object, or $ISODateTimeFormat if omitted.
Similar to `DateTime(::AbstractString, ::AbstractString)` but more efficient when
repeatedly parsing similarly formatted date time strings with a pre-created
`DateFormat` object.
"""
DateTime(dt::AbstractString, df::DateFormat=ISODateTimeFormat) = parse(DateTime, dt, df)
"""
Date(d::AbstractString, format::AbstractString; locale="english") -> Date
Construct a `Date` by parsing the `d` date string following the pattern given
in the `format` string (see [`DateFormat`](@ref) for syntax).
!!! note
This method creates a `DateFormat` object each time it is called. It is recommended
that you create a [`DateFormat`](@ref) object instead and use that as the second
argument to avoid performance loss when using the same format repeatedly.
# Example
```jldoctest
julia> Date("2020-01-01", "yyyy-mm-dd")
2020-01-01
julia> a = ("2020-01-01", "2020-01-02");
julia> [Date(d, dateformat"yyyy-mm-dd") for d ∈ a] # preferred
2-element Vector{Date}:
2020-01-01
2020-01-02
```
"""
function Date(d::AbstractString, format::AbstractString; locale::Locale=ENGLISH)
parse(Date, d, DateFormat(format, locale))
end
"""
Date(d::AbstractString, df::DateFormat=ISODateFormat) -> Date
Construct a `Date` by parsing the `d` date string following the
pattern given in the [`DateFormat`](@ref) object, or $ISODateFormat if omitted.
Similar to `Date(::AbstractString, ::AbstractString)` but more efficient when
repeatedly parsing similarly formatted date strings with a pre-created
`DateFormat` object.
"""
Date(d::AbstractString, df::DateFormat=ISODateFormat) = parse(Date, d, df)
"""
Time(t::AbstractString, format::AbstractString; locale="english") -> Time
Construct a `Time` by parsing the `t` time string following the pattern given
in the `format` string (see [`DateFormat`](@ref) for syntax).
!!! note
This method creates a `DateFormat` object each time it is called. It is recommended
that you create a [`DateFormat`](@ref) object instead and use that as the second
argument to avoid performance loss when using the same format repeatedly.
# Example
```jldoctest
julia> Time("12:34pm", "HH:MMp")
12:34:00
julia> a = ("12:34pm", "2:34am");
julia> [Time(d, dateformat"HH:MMp") for d ∈ a] # preferred
2-element Vector{Time}:
12:34:00
02:34:00
```
"""
function Time(t::AbstractString, format::AbstractString; locale::Locale=ENGLISH)
parse(Time, t, DateFormat(format, locale))
end
"""
Time(t::AbstractString, df::DateFormat=ISOTimeFormat) -> Time
Construct a `Time` by parsing the `t` date time string following the
pattern given in the [`DateFormat`](@ref) object, or $ISOTimeFormat if omitted.
Similar to `Time(::AbstractString, ::AbstractString)` but more efficient when
repeatedly parsing similarly formatted time strings with a pre-created
`DateFormat` object.
"""
Time(t::AbstractString, df::DateFormat=ISOTimeFormat) = parse(Time, t, df)
@generated function format(io::IO, dt::TimeType, fmt::DateFormat{<:Any,T}) where T
N = fieldcount(T)
quote
ts = fmt.tokens
loc = fmt.locale
Base.@nexprs $N i -> format(io, ts[i], dt, loc)
end
end
function format(dt::TimeType, fmt::DateFormat, bufsize=12)
# preallocate to reduce resizing
io = IOBuffer(Vector{UInt8}(undef, bufsize), read=true, write=true)
format(io, dt, fmt)
String(io.data[1:io.ptr - 1])
end
"""
format(dt::TimeType, format::AbstractString; locale="english") -> AbstractString
Construct a string by using a `TimeType` object and applying the provided `format`. The
following character codes can be used to construct the `format` string:
| Code | Examples | Comment |
|:-----------|:----------|:-------------------------------------------------------------|
| `y` | 6 | Numeric year with a fixed width |
| `Y` | 1996 | Numeric year with a minimum width |
| `m` | 1, 12 | Numeric month with a minimum width |
| `u` | Jan | Month name shortened to 3-chars according to the `locale` |
| `U` | January | Full month name according to the `locale` keyword |
| `d` | 1, 31 | Day of the month with a minimum width |
| `H` | 0, 23 | Hour (24-hour clock) with a minimum width |
| `M` | 0, 59 | Minute with a minimum width |
| `S` | 0, 59 | Second with a minimum width |
| `s` | 000, 500 | Millisecond with a minimum width of 3 |
| `e` | Mon, Tue | Abbreviated days of the week |
| `E` | Monday | Full day of week name |
The number of sequential code characters indicate the width of the code. A format of
`yyyy-mm` specifies that the code `y` should have a width of four while `m` a width of two.
Codes that yield numeric digits have an associated mode: fixed-width or minimum-width.
The fixed-width mode left-pads the value with zeros when it is shorter than the specified
width and truncates the value when longer. Minimum-width mode works the same as fixed-width
except that it does not truncate values longer than the width.
When creating a `format` you can use any non-code characters as a separator. For example to
generate the string "1996-01-15T00:00:00" you could use `format`: "yyyy-mm-ddTHH:MM:SS".
Note that if you need to use a code character as a literal you can use the escape character
backslash. The string "1996y01m" can be produced with the format "yyyy\\ymm\\m".
"""
function format(dt::TimeType, f::AbstractString; locale::Locale=ENGLISH)
format(dt, DateFormat(f, locale))
end
# show
function Base.print(io::IO, dt::DateTime)
str = if millisecond(dt) == 0
format(dt, dateformat"YYYY-mm-dd\THH:MM:SS", 19)
else
format(dt, dateformat"YYYY-mm-dd\THH:MM:SS.sss", 23)
end
print(io, str)
end
function Base.print(io::IO, dt::Date)
# don't use format - bypassing IOBuffer creation
# saves a bit of time here.
y,m,d = yearmonthday(value(dt))
yy = y < 0 ? @sprintf("%05i", y) : lpad(y, 4, "0")
mm = lpad(m, 2, "0")
dd = lpad(d, 2, "0")
print(io, "$yy-$mm-$dd")
end
for date_type in (:Date, :DateTime)
# Human readable output (i.e. "2012-01-01")
@eval Base.show(io::IO, ::MIME"text/plain", dt::$date_type) = print(io, dt)
# Parsable output (i.e. Date("2012-01-01"))
@eval Base.show(io::IO, dt::$date_type) = print(io, typeof(dt), "(\"", dt, "\")")
# Parsable output will have type info displayed, thus it is implied
@eval Base.typeinfo_implicit(::Type{$date_type}) = true
end
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment