From 1478b9706661c0500da92f483d9a6d876ab4c771 Mon Sep 17 00:00:00 2001 From: syldium Date: Sat, 13 Apr 2024 19:48:39 +0200 Subject: [PATCH 01/11] [ci skip] Preview the analyzer rework --- Cargo.lock | 9 +- analyzer/Cargo.toml | 2 + analyzer/src/dependency.rs | 104 - analyzer/src/diagnostic.rs | 210 -- analyzer/src/engine.rs | 101 - analyzer/src/environment.rs | 141 - analyzer/src/environment/symbols.rs | 472 ---- analyzer/src/{types => }/hir.rs | 175 +- analyzer/src/hoist.rs | 691 +++++ analyzer/src/importer.rs | 88 - analyzer/src/imports.rs | 148 - analyzer/src/lib.rs | 368 +-- analyzer/src/module.rs | 552 ++++ analyzer/src/name.rs | 130 - analyzer/src/reef.rs | 99 - analyzer/src/relations.rs | 243 -- analyzer/src/steps.rs | 42 - analyzer/src/steps/collect.rs | 1258 --------- analyzer/src/steps/resolve.rs | 1370 --------- analyzer/src/steps/resolve/diagnostics.rs | 99 - analyzer/src/steps/resolve/import.rs | 203 -- analyzer/src/steps/resolve/symbol.rs | 134 - analyzer/src/steps/shared_diagnostics.rs | 35 - analyzer/src/steps/typing.rs | 3086 --------------------- analyzer/src/steps/typing/assign.rs | 236 -- analyzer/src/steps/typing/bounds.rs | 185 -- analyzer/src/steps/typing/coercion.rs | 331 --- analyzer/src/steps/typing/exploration.rs | 278 -- analyzer/src/steps/typing/function.rs | 1057 ------- analyzer/src/steps/typing/iterable.rs | 216 -- analyzer/src/steps/typing/lower.rs | 132 - analyzer/src/steps/typing/magic.rs | 66 - analyzer/src/steps/typing/structure.rs | 429 --- analyzer/src/steps/typing/view.rs | 112 - analyzer/src/symbol.rs | 211 ++ analyzer/src/types.rs | 68 - analyzer/src/types/builtin.rs | 318 --- analyzer/src/types/ctx.rs | 100 - analyzer/src/types/engine.rs | 307 -- analyzer/src/types/operator.rs | 21 - analyzer/src/types/ty.rs | 204 -- analyzer/src/typing.rs | 1846 ++++++++++++ analyzer/src/typing/assign.rs | 148 + analyzer/src/typing/function.rs | 18 + analyzer/src/typing/lower.rs | 102 + analyzer/src/typing/registry.rs | 86 + analyzer/src/typing/schema.rs | 64 + analyzer/src/typing/shell.rs | 149 + analyzer/src/typing/user.rs | 205 ++ analyzer/src/typing/variable.rs | 242 ++ analyzer/tests/collect_debug.rs | 422 +-- ast/src/function.rs | 10 + ast/src/use.rs | 2 +- ast/src/variable.rs | 2 +- cli/src/cli.rs | 169 +- cli/src/disassemble.rs | 1 - cli/src/main.rs | 127 +- cli/src/pipeline.rs | 202 +- cli/src/repl.rs | 154 +- cli/src/report.rs | 259 +- cli/src/std.rs | 83 - compiler/src/bytecode.rs | 24 +- compiler/src/context.rs | 81 +- compiler/src/emit.rs | 84 +- compiler/src/emit/identifier.rs | 62 - compiler/src/emit/invoke.rs | 94 +- compiler/src/emit/iterable.rs | 101 +- compiler/src/emit/jump.rs | 3 +- compiler/src/emit/native.rs | 305 +- compiler/src/emit/structure.rs | 7 +- compiler/src/externals.rs | 35 - compiler/src/lib.rs | 426 +-- compiler/src/locals.rs | 86 +- compiler/src/structure.rs | 13 +- compiler/src/type.rs | 32 +- context/src/source.rs | 1 + parser/src/lib.rs | 23 +- 77 files changed, 5290 insertions(+), 14409 deletions(-) delete mode 100644 analyzer/src/dependency.rs delete mode 100644 analyzer/src/diagnostic.rs delete mode 100644 analyzer/src/engine.rs delete mode 100644 analyzer/src/environment.rs delete mode 100644 analyzer/src/environment/symbols.rs rename analyzer/src/{types => }/hir.rs (52%) create mode 100644 analyzer/src/hoist.rs delete mode 100644 analyzer/src/importer.rs delete mode 100644 analyzer/src/imports.rs create mode 100644 analyzer/src/module.rs delete mode 100644 analyzer/src/name.rs delete mode 100644 analyzer/src/reef.rs delete mode 100644 analyzer/src/relations.rs delete mode 100644 analyzer/src/steps.rs delete mode 100644 analyzer/src/steps/collect.rs delete mode 100644 analyzer/src/steps/resolve.rs delete mode 100644 analyzer/src/steps/resolve/diagnostics.rs delete mode 100644 analyzer/src/steps/resolve/import.rs delete mode 100644 analyzer/src/steps/resolve/symbol.rs delete mode 100644 analyzer/src/steps/shared_diagnostics.rs delete mode 100644 analyzer/src/steps/typing.rs delete mode 100644 analyzer/src/steps/typing/assign.rs delete mode 100644 analyzer/src/steps/typing/bounds.rs delete mode 100644 analyzer/src/steps/typing/coercion.rs delete mode 100644 analyzer/src/steps/typing/exploration.rs delete mode 100644 analyzer/src/steps/typing/function.rs delete mode 100644 analyzer/src/steps/typing/iterable.rs delete mode 100644 analyzer/src/steps/typing/lower.rs delete mode 100644 analyzer/src/steps/typing/magic.rs delete mode 100644 analyzer/src/steps/typing/structure.rs delete mode 100644 analyzer/src/steps/typing/view.rs create mode 100644 analyzer/src/symbol.rs delete mode 100644 analyzer/src/types.rs delete mode 100644 analyzer/src/types/builtin.rs delete mode 100644 analyzer/src/types/ctx.rs delete mode 100644 analyzer/src/types/engine.rs delete mode 100644 analyzer/src/types/operator.rs delete mode 100644 analyzer/src/types/ty.rs create mode 100644 analyzer/src/typing.rs create mode 100644 analyzer/src/typing/assign.rs create mode 100644 analyzer/src/typing/function.rs create mode 100644 analyzer/src/typing/lower.rs create mode 100644 analyzer/src/typing/registry.rs create mode 100644 analyzer/src/typing/schema.rs create mode 100644 analyzer/src/typing/shell.rs create mode 100644 analyzer/src/typing/user.rs create mode 100644 analyzer/src/typing/variable.rs delete mode 100644 cli/src/std.rs delete mode 100644 compiler/src/emit/identifier.rs delete mode 100644 compiler/src/externals.rs diff --git a/Cargo.lock b/Cargo.lock index 54ff2e64..08029f20 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,6 +21,7 @@ dependencies = [ "indexmap", "parser", "pretty_assertions", + "thiserror", ] [[package]] @@ -1276,18 +1277,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.56" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.56" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", diff --git a/analyzer/Cargo.toml b/analyzer/Cargo.toml index de3c5264..3cc6396d 100644 --- a/analyzer/Cargo.toml +++ b/analyzer/Cargo.toml @@ -8,8 +8,10 @@ edition = "2021" [dependencies] ast = { path = "../ast" } context = { path = "../context" } +parser = { path = "../parser" } enum-assoc = "1.0.0" indexmap = "2.0.2" +thiserror = "1.0.58" [dev-dependencies] parser = { path = "../parser" } diff --git a/analyzer/src/dependency.rs b/analyzer/src/dependency.rs deleted file mode 100644 index c593e739..00000000 --- a/analyzer/src/dependency.rs +++ /dev/null @@ -1,104 +0,0 @@ -use std::collections::{HashMap, HashSet}; -use std::hash::Hash; - -/// A directed graph of dependencies. -#[derive(Debug, Clone, PartialEq)] -pub struct Dependencies -where - N: Eq + Hash, -{ - /// The nodes of the graph. - top: HashMap>, -} - -impl Dependencies -where - N: Eq + Hash, -{ - /// Explicitly adds a new node to the graph. - pub fn add_node(&mut self, node: N) { - self.top.entry(node).or_default(); - } - - /// Adds a new directed dependency from `from` to `to`. - /// - /// If the nodes do not exist, they are implicitly added. - pub fn add_dependency(&mut self, from: N, to: N) { - self.top.entry(from).or_default().push(to); - } -} - -impl Default for Dependencies -where - N: Eq + Hash, -{ - fn default() -> Self { - Self { - top: HashMap::new(), - } - } -} - -/// Gets an ordered list of nodes such that all dependencies are before the node. -/// -/// The order is such that if `A` depends on `B`, then `B` will be before `A` in the list. -pub fn topological_sort(dependencies: &Dependencies) -> Vec -where - N: Eq + Hash + Copy, -{ - let dep_count = dependencies.top.len(); - let mut sorted = Vec::with_capacity(dep_count); - let mut visited = HashSet::with_capacity(dep_count); - let mut stack = Vec::new(); - for node in dependencies.top.keys() { - if visited.contains(node) { - continue; - } - stack.push(*node); - while let Some(node) = stack.pop() { - if visited.insert(node) { - stack.push(node); - if let Some(dependencies) = dependencies.top.get(&node) { - for dependency in dependencies { - if !visited.contains(dependency) { - stack.push(*dependency); - } - } - } - } else { - sorted.push(node); - } - } - } - //FIXME: can contain duplicates - sorted.dedup(); - sorted -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn any_topological_sort() { - let mut dependencies = Dependencies::default(); - dependencies.add_node(0); - dependencies.add_node(1); - dependencies.add_node(2); - let mut res = topological_sort(&dependencies); - res.sort(); - assert_eq!(res, vec![0, 1, 2]); - } - - #[test] - fn order_topological_sort() { - let mut dependencies = Dependencies::default(); - dependencies.add_dependency(0, 1); - dependencies.add_dependency(0, 2); - dependencies.add_dependency(1, 2); - dependencies.add_dependency(1, 3); - dependencies.add_dependency(2, 3); - let sorted = topological_sort(&dependencies); - assert_eq!(sorted, vec![3, 2, 1, 0]); - } -} diff --git a/analyzer/src/diagnostic.rs b/analyzer/src/diagnostic.rs deleted file mode 100644 index 4cc430cc..00000000 --- a/analyzer/src/diagnostic.rs +++ /dev/null @@ -1,210 +0,0 @@ -use enum_assoc::Assoc; - -use crate::reef::ReefId; -use context::source::SourceSegment; - -use crate::relations::SourceId; - -#[non_exhaustive] -#[derive(PartialEq, Debug, Assoc, Clone, Copy)] -#[func(pub fn code(&self) -> u16)] -#[func(pub fn critical(&self) -> bool { true })] -pub enum DiagnosticID { - #[assoc(code = 1)] - UnsupportedFeature, - - /// An import could not be resolved - #[assoc(code = 2)] - ImportResolution, - - /// A symbol is unknown as it could not be resolved - #[assoc(code = 3)] - UnknownSymbol, - - /// A symbol is invalid as it cannot be accessed in any way - /// (for example, a symbol into a function, or in a variable) - #[assoc(code = 4)] - InvalidSymbol, - - /// A symbol path is invalid by its structure - /// (for example, the path `reef::foo::reef` is invalid because the last `reef` would targets the current reef) - #[assoc(code = 5)] - InvalidSymbolPath, - - /// There is a `use` statement between two expressions, - /// `use` needs to be declared before any expressions in an environment. - #[assoc(code = 6)] - UseBetweenExprs, - - /// A `use` statement is shadowed as the symbol it imports has been imported again below - #[assoc(code = 7)] - #[assoc(critical = false)] - ShadowedImport, - - /// A symbol have the same fully qualified name (its name with its module's name prepended) - /// as another module - #[assoc(code = 8)] - SymbolConflictsWithModule, - - /// A type annotation refers to an unknown type. - #[assoc(code = 9)] - UnknownType, - - /// A type annotation is not matching the expected type. - #[assoc(code = 10)] - TypeMismatch, - - /// A type annotation is missing, and cannot be inferred. - #[assoc(code = 11)] - CannotInfer, - - /// Occurs when a `continue` or `break` directive is declared outside of a loop. - #[assoc(code = 12)] - InvalidBreakOrContinue, - - /// A type cannot be casted to another type. - #[assoc(code = 13)] - IncompatibleCast, - - /// A named method is unknown or does not match the expected signature. - #[assoc(code = 14)] - UnknownMethod, - - /// A variable is being reassigned, but it is not mutable. - #[assoc(code = 15)] - CannotReassign, - - /// A function does not have a definition. - /// - /// Only internals reefs that are defined natively can omit an implementation. - #[assoc(code = 16)] - NoFunctionDefinition, - - /// An incorrect number of type arguments was provided. - #[assoc(code = 17)] - InvalidTypeArguments, - - /// An assignment operator was used on a non-place expression. - #[assoc(code = 18)] - InvalidAssignment, - - /// A field access was done on a value that is not a structure - #[assoc(code = 19)] - InvalidFieldAccess, -} - -/// Observations are labels in a code snippet that are used to explain a [`Diagnostic`]. -/// -/// They can contain a message to explain the role of this specific snippet. -#[derive(Clone, PartialEq, Debug)] -pub struct Observation { - /// The location where this observation applies - pub location: SourceLocation, - /// An optional help string to complete the observation - pub message: Option, -} - -impl Observation { - /// Creates an observation that underlines an erroneous location. - /// - /// Prefer adding a label to explain the observation. - pub fn new(location: SourceLocation) -> Self { - Self { - location, - message: None, - } - } - - /// Creates an observation on an erroneous location. - pub fn here( - source: SourceId, - reef: ReefId, - segment: SourceSegment, - message: impl Into, - ) -> Self { - Self { - location: SourceLocation::new(source, reef, segment), - message: Some(message.into()), - } - } - - /// Creates a contextual observation. - pub fn context( - source: SourceId, - reef: ReefId, - segment: SourceSegment, - message: impl Into, - ) -> Self { - Self { - location: SourceLocation::new(source, reef, segment), - message: Some(message.into()), - } - } -} - -/// A location in a source code. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct SourceLocation { - pub source: SourceId, - pub reef: ReefId, - pub segment: SourceSegment, -} - -impl SourceLocation { - /// Creates a new source location. - pub fn new(source: SourceId, reef: ReefId, segment: SourceSegment) -> Self { - Self { - source, - reef, - segment, - } - } -} - -/// The structure of a diagnostic. -#[derive(PartialEq, Debug)] -pub struct Diagnostic { - /// The diagnostic identifier - pub identifier: DiagnosticID, - /// The overall message of this diagnostic - pub global_message: String, - /// Some observations to explain the diagnostic - pub observations: Vec, - /// Any tips to help the user understand and eventually fix the raised issue. - pub helps: Vec, -} - -impl Diagnostic { - pub fn new(id: DiagnosticID, msg: impl Into) -> Self { - Self { - identifier: id, - global_message: msg.into(), - observations: Vec::new(), - helps: Vec::new(), - } - } - - pub fn with_observation(mut self, o: Observation) -> Self { - self.observations.push(o); - self - } - - pub fn with_observations>( - mut self, - observations: I, - ) -> Self { - self.observations.extend(observations); - self - } - - pub fn with_help(mut self, help: impl Into) -> Self { - self.helps.push(help.into()); - self - } -} - -impl From<(SourceId, ReefId, SourceSegment)> for Observation { - fn from((source, reef, segment): (SourceId, ReefId, SourceSegment)) -> Self { - Self::new(SourceLocation::new(source, reef, segment)) - } -} diff --git a/analyzer/src/engine.rs b/analyzer/src/engine.rs deleted file mode 100644 index 8e967a33..00000000 --- a/analyzer/src/engine.rs +++ /dev/null @@ -1,101 +0,0 @@ -use ast::Expr; -use context::source::ContentId; - -use crate::environment::Environment; -use crate::name::Name; -use crate::relations::SourceId; - -/// Owns references to the global AST and its environments. -#[derive(Debug, Default)] -pub struct Engine<'a> { - /// The engine has the ownership of the AST. - #[allow(clippy::vec_box)] - // Box is used to ensure that the reference behind is still valid after vector's realloc - asts: Vec>, - - /// Associates a module id to the corresponding environment. - /// - /// Those are origins of symbols that are available locally in the environment, - /// which may also be the source of unresolved symbols, tracked in the Relations. - pub(crate) origins: Vec<(ContentId, &'a Expr, Option)>, -} - -impl<'a> Engine<'a> { - /// Takes ownership of an expression and returns a reference to it. - pub fn take(&mut self, ast: Expr) -> &'a Expr { - self.asts.push(Box::new(ast)); - unsafe { - // SAFETY: Assume for now that expressions are never removed from the engine. - // The reference behind Box does not change and is valid for the lifetime of the engine. - std::mem::transmute::<&Expr, &'a Expr>(self.asts.last().unwrap()) - } - } - - ///Returns an iterator over environments contained in engine - pub fn environments(&self) -> impl Iterator { - self.origins - .iter() - .enumerate() - .filter_map(|(id, (_, _, env))| env.as_ref().map(|env| (SourceId(id), env))) - } - - /// Adds a new origin to the engine and returns its given id. - /// - /// A call to this method must be followed by a call to [`Engine::attach`] with the same id - /// after the environment has been built. - pub fn track(&mut self, content_id: ContentId, ast: &'a Expr) -> SourceId { - let id = self.origins.len(); - self.origins.push((content_id, ast, None)); - SourceId(id) - } - - /// Attaches an environment to an origin if the origin does not already have an attached environment. - pub fn attach(&mut self, id: SourceId, env: Environment) -> &mut Environment { - debug_assert!( - self.origins[id.0].2.is_none(), - "Could not attach environment to a source that is already attached" - ); - self.origins[id.0].2.replace(env); - self.origins[id.0].2.as_mut().unwrap() - } - - ///Finds an environment by its fully qualified name. - pub fn find_environment_by_name(&self, name: &Name) -> Option<(SourceId, &Environment)> { - self.origins - .iter() - .enumerate() - .find(|(_, (_, _, env))| env.as_ref().map(|env| &env.fqn == name).unwrap_or(false)) - .and_then(|(idx, (_, _, env))| env.as_ref().map(|env| (SourceId(idx), env))) - } - - pub fn get_expression(&self, id: SourceId) -> Option<&Expr> { - self.origins.get(id.0).map(|(_, expr, _)| *expr) - } - - /// Gets an environment by its identifier. - pub fn get_environment(&self, id: SourceId) -> Option<&Environment> { - self.origins.get(id.0).and_then(|(_, _, env)| env.as_ref()) - } - - /// Gets an environment by its identifier. - pub fn get_environment_mut(&mut self, id: SourceId) -> Option<&mut Environment> { - self.origins - .get_mut(id.0) - .and_then(|(_, _, env)| env.as_mut()) - } - - /// Gets the number of origins in the engine. - pub fn len(&self) -> usize { - self.origins.len() - } - - /// Returns `true` does not contain any origin. - pub fn is_empty(&self) -> bool { - self.origins.is_empty() - } - - /// Gets the id of the AST tree that was used to create the given origin. - pub fn get_original_content(&self, id: SourceId) -> Option { - self.origins.get(id.0).map(|(content_id, _, _)| *content_id) - } -} diff --git a/analyzer/src/environment.rs b/analyzer/src/environment.rs deleted file mode 100644 index 6e8959a2..00000000 --- a/analyzer/src/environment.rs +++ /dev/null @@ -1,141 +0,0 @@ -//! The type environment of the analyzer. -//! -//! An environment maps local variable names to their type and keep tracks of scopes. -//! The same variable name can be accessed in different scopes, and can have different type in -//! different stack frames. For example: -//! -//! ```code -//! { -//! // The variable `n` doesn't exist yet. -//! val n = 9; // Create a new variable `n` with type `int`. -//! // In this frame, the variable `n` of type `int` is in scope. -//! { -//! // The variable `n` exists, and refers to the variable in the outer scope. -//! val n = "9"; // Create a new variable `n` with type `any` that shadows the outer `n`. -//! echo $n; -//! // In this frame, the variable `n` of type `any` is in scope. -//! } -//! // In this frame, the variable `n` of type `int` is in scope. -//! echo $n; -//! } -//! ``` - -use std::collections::HashMap; - -use context::source::{SourceSegment, SourceSegmentHolder}; -use symbols::Symbols; - -use crate::name::Name; -use crate::relations::{SourceId, SymbolRef}; - -pub mod symbols; - -/// An environment. -/// The Environment contains the defined types, variables, structure and function definitions of a certain scope. -/// It can have dependencies over other environments. -#[derive(Debug, Clone)] -pub struct Environment { - /// The source object id of the parent environment, if the environment is nested. - pub parent: Option, - - /// Whether the environment is directly executable. - pub is_script: bool, - - ///Fully qualified name of the environment - pub fqn: Name, - - /// The variables that are declared in the environment. - pub symbols: Symbols, - - /// A mapping of expression segments to symbols. - pub definitions: HashMap, - - /// A mapping of expression segments to their declaring environment. - pub declarations: HashMap, -} - -impl Environment { - pub fn script(name: Name) -> Self { - Self { - parent: None, - is_script: true, - fqn: name, - symbols: Symbols::default(), - definitions: HashMap::new(), - declarations: HashMap::new(), - } - } - - pub fn fork(&self, source_id: SourceId, name: &str) -> Environment { - let env_fqn = self.fqn.child(name); - - Self { - parent: Some(source_id), - is_script: false, - fqn: env_fqn, - symbols: Symbols::default(), - definitions: HashMap::new(), - declarations: HashMap::new(), - } - } - - pub fn begin_scope(&mut self) { - self.symbols.begin_scope(); - } - - pub fn end_scope(&mut self) { - self.symbols.end_scope(); - } - - /// Gets an iterator over the direct inner environment identifiers. - pub fn iter_direct_inner_environments(&self) -> impl Iterator + '_ { - self.declarations.values().copied() - } - - /// Tests if the position of the declaration of a symbol is important. - /// - /// If the declaration order is important in the host environment, this requires that symbol - /// resolution must be done immediately after the child environment is collected. It does - /// mean that all the symbols referenced in the declaration and in this environment must be - /// declared before. If not, symbol resolution happens after the whole environment is collected, - /// and the symbol can be resolved in any order. - pub fn has_strict_declaration_order(&self) -> bool { - !self.is_script - } - - /// Adds an annotation to any segment. - pub fn annotate(&mut self, segment: &impl SourceSegmentHolder, symbol: SymbolRef) { - self.definitions.insert(segment.segment(), symbol); - } - - /// Maps the declaring environment of a segment. - pub fn bind_source(&mut self, segment: &impl SourceSegmentHolder, source: SourceId) { - self.declarations.insert(segment.segment(), source); - } - - /// Iterates over the segments that maps to a symbol. - pub fn list_definitions(&self) -> impl Iterator { - self.definitions.iter() - } - - /// Gets a symbol from the environment. - pub fn get_raw_symbol(&self, segment: SourceSegment) -> Option { - self.definitions.get(&segment).copied() - } - - /// Gets the declaring environment id of a segment. - pub fn get_raw_env(&self, segment: SourceSegment) -> Option { - self.declarations.get(&segment).copied() - } - - /// Finds the local segments that references a symbol. - pub fn find_references(&self, symbol_declaration: SymbolRef) -> Vec { - let mut references = Vec::new(); - for (segment, symbol_reference) in &self.definitions { - if symbol_reference == &symbol_declaration { - references.push(segment.clone()); - } - } - references - } -} diff --git a/analyzer/src/environment/symbols.rs b/analyzer/src/environment/symbols.rs deleted file mode 100644 index 7253b45b..00000000 --- a/analyzer/src/environment/symbols.rs +++ /dev/null @@ -1,472 +0,0 @@ -use std::collections::hash_map::Entry; -use std::collections::HashMap; -use std::fmt::{Display, Formatter}; - -use ast::r#use::InclusionPathItem; -use context::source::{SourceSegment, SourceSegmentHolder}; - -use crate::engine::Engine; -use crate::name::Name; -use crate::reef::{Externals, ReefId}; -use crate::relations::{LocalId, RelationId}; - -/// Information over the declared type of a variable -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum SymbolInfo { - /// The symbol is a regular variable - Variable, - /// The symbol is a function declaration - Function, - /// The symbol is a type - Type, - - /// A magic symbol - Magic(MagicSymbolKind), -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum MagicSymbolKind { - /// This magic symbol refers to the program's arguments - ProgramArguments, -} - -#[derive(Debug, Eq, PartialEq, Hash)] -pub enum SymbolPathItem { - Reef(SourceSegment), - Symbol(String, SourceSegment), -} - -impl SourceSegmentHolder for SymbolPathItem { - fn segment(&self) -> SourceSegment { - match self { - SymbolPathItem::Reef(s) => s.clone(), - SymbolPathItem::Symbol(_, s) => s.clone(), - } - } -} - -impl Display for SymbolInfo { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - SymbolInfo::Variable => write!(f, "variable"), - SymbolInfo::Function => write!(f, "function"), - SymbolInfo::Type => write!(f, "type"), - SymbolInfo::Magic(MagicSymbolKind::ProgramArguments) => write!(f, "program arguments"), - } - } -} - -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] -pub enum SymbolRegistry { - /// type symbols - Types, - /// Variable and functions symbols - Objects, - /// Specific magic variable - Magic(MagicSymbolKind), -} - -impl SymbolRegistry { - /// returns true if the given symbol info is part of this registry - pub(crate) fn accepts(self, kind: SymbolInfo) -> bool { - match self { - SymbolRegistry::Types => matches!(kind, SymbolInfo::Type), - SymbolRegistry::Objects => matches!(kind, SymbolInfo::Variable | SymbolInfo::Function), - SymbolRegistry::Magic(pr) => matches!(kind, SymbolInfo::Magic(pl) if pr == pl), - } - } -} - -/// A symbol location is the result of the resolution of a sequence of [SymbolPathItem] (see [SymbolLocation::compute]). -#[derive(Debug, Eq, PartialEq, Hash, Clone)] -pub struct SymbolLocation { - /// The resolved name, can be relative only if [`is_current_reef_explicit`] is set to false. - pub name: Name, - /// If set to true, this location is an absolute location pointing to the current reef. - pub is_current_reef_explicit: bool, -} - -impl SymbolLocation { - /// constructs an absolute symbol location, pointing to the current reef - pub fn in_current_reef(fqn: Name) -> Self { - Self { - name: fqn, - is_current_reef_explicit: true, - } - } - - /// constructs a a symbol location, without specifying that the given name is absolute or relative, and - /// if this location targets the current reef. - pub fn unspecified(name: Name) -> Self { - Self { - name, - is_current_reef_explicit: false, - } - } - - /// Computes a symbol location from a given slice of [InclusionPathItem], - /// returning `Err(Vec)` if the path input contains invalid items, where the vector's segments are - /// the invalid item segments. - /// - /// A path is invalid if it contains any non-heading [InclusionPathItem::Reef] item. - /// If the [`must_be_relative`] flag is set, the path must not contain any [InclusionPathItem::Reef] to be valid. - /// - /// The function can also fail if the `must_be_relative` - pub fn compute(path: &[InclusionPathItem]) -> Result> { - let current_reef = path - .first() - .is_some_and(|f| matches!(f, InclusionPathItem::Reef(_))); - - let mut path_it = path.iter(); - - if current_reef && path.len() > 1 { - path_it.next(); - } - - let mut parts = Vec::new(); - let mut bad_segments = Vec::new(); - for it in path_it { - match it { - InclusionPathItem::Reef(seg) => bad_segments.push(seg.clone()), - InclusionPathItem::Symbol(ident) => parts.push(ident.to_string()), - } - } - - if !bad_segments.is_empty() { - return Err(bad_segments); - } - - Ok(Self { - name: Name::from(parts), - is_current_reef_explicit: current_reef, - }) - } -} - -pub fn resolve_loc<'a, 'e>( - loc: &SymbolLocation, - current: &'a Engine<'e>, - externals: &'a Externals<'e>, -) -> Option<(&'a Engine<'e>, ReefId)> { - if loc.is_current_reef_explicit { - Some((current, externals.current)) - } else { - let reef_name = loc.name.root(); - externals - .get_reef_by_name(reef_name) - .map(|(reef, id)| (&reef.engine, id)) - } -} - -/// A collection of variables -#[derive(Debug, Clone, Default)] -pub struct Symbols { - /// Locals declarations - locals: Locals, - - /// Relations with external variables. - /// The key is the variable Names, where value is the concerned symbol registry, - /// with relation to another external environment symbols. - externals: HashMap, -} - -impl Symbols { - /// Creates a new named local variable. - pub fn declare_local(&mut self, name: String, ty: SymbolInfo) -> LocalId { - self.locals.declare(name, ty) - } - - /// Creates a new magic variable - pub fn declare_magic(&mut self, ty: MagicSymbolKind) -> LocalId { - self.locals.declare_magic(ty) - } - - pub fn find_magic(&self, ty: MagicSymbolKind) -> Option { - self.locals.find_magic(ty) - } - - /// Returns the local variable associated with the id - pub fn get(&self, id: LocalId) -> Option<&Symbol> { - self.locals.vars.get(id.0) - } - - /// Returns an entry for the given external symbol name relation - pub fn external(&mut self, loc: SymbolLocation) -> Entry { - self.externals.entry(loc) - } - - /// Finds the local identifier associated with an already known name. - /// - /// The lookup uses the current scope, which is frequently updated during the collection phase. - /// That's the main reason why this method should be used in pair the variable capture - /// resolution, immediately after the closure is observed and inertly populated. - pub fn find_reachable(&self, name: &str, registry: SymbolRegistry) -> Option { - self.locals.position_reachable_local(name, registry) - } - - /// Finds the local exported symbol associated with an already known name. - /// - /// Exported symbols are always declared in the outermost scope, and should be checked only - /// after the whole environment is collected. - pub fn find_exported(&self, name: &str, registry: SymbolRegistry) -> Option { - self.locals - .vars - .iter() - .rev() - .position(|sym| sym.name == name && sym.is_exported() && registry.accepts(sym.ty)) - .map(|idx| LocalId(self.locals.vars.len() - 1 - idx)) - } - - /// Lists all local variables, in the order they are declared. - /// - /// This exposes their current state, which is frequently updated. - /// Use [`Symbols::find_reachable`] to lookup any variable during the collection phase, - /// or [`Symbols::find_exported`] to lookup an exported variable after the collection phase. - pub fn all(&self) -> &[Symbol] { - &self.locals.vars - } - - /// returns an iterator over all variables, with their local identifier - pub fn iter(&self) -> impl Iterator { - self.locals - .vars - .iter() - .enumerate() - .map(|(i, v)| (LocalId(i), v)) - } - - /// Return the amount of locals presents - pub fn len(&self) -> usize { - self.locals.vars.len() - } - - pub fn is_empty(&self) -> bool { - self.locals.vars.is_empty() - } - - /// Iterates over all the exported symbols, local to the environment. - pub fn exported_symbols(&self) -> impl Iterator { - //consider for now that all local vars of the outermost scope are exported - self.locals - .vars - .iter() - .enumerate() - .filter(|(_, var)| var.is_exported()) - .map(|(id, var)| (LocalId(id), var)) - } - - /// Iterates over all the global symbol ids, with their corresponding name. - pub fn external_symbols(&self) -> impl Iterator { - self.externals.iter().map(|(loc, sym)| (loc, *sym)) - } - - /// Finds the name of an external symbol. - /// - /// This returns the name only if the global object comes from this environment. - pub fn find_external_symbol_name(&self, object_id: RelationId) -> Option<&SymbolLocation> { - self.externals - .iter() - .find_map(|(name, id)| (*id == object_id).then_some(name)) - } - - pub fn begin_scope(&mut self) { - self.locals.begin_scope(); - } - - pub fn end_scope(&mut self) { - self.locals.end_scope(); - } -} - -#[derive(Debug, Clone, Default)] -struct Locals { - /// The actual list of seen and unique variables. - vars: Vec, - - /// The current depth of the scope. - /// - /// The first scope is 0. - current_depth: usize, -} - -impl Locals { - /// Adds a new symbol and binds it to the current scope. - fn declare(&mut self, name: String, ty: SymbolInfo) -> LocalId { - let id = self.vars.len(); - self.vars.push(Symbol { - name, - depth: self.current_depth as isize, - ty, - }); - LocalId(id) - } - - fn declare_magic(&mut self, ty: MagicSymbolKind) -> LocalId { - let id = self.vars.len(); - self.vars.push(Symbol { - name: String::default(), - depth: -1, //exported - ty: SymbolInfo::Magic(ty), - }); - LocalId(id) - } - - pub(crate) fn find_magic(&self, ty: MagicSymbolKind) -> Option { - self.vars - .iter() - .position(|var| var.ty == SymbolInfo::Magic(ty)) - .map(LocalId) - } - - /// Moves into a new scope. - /// - /// # Panics - /// This method panics if the maximum number of scopes has been reached. - fn begin_scope(&mut self) { - self.current_depth = (self.current_depth as isize) - .checked_add(1) - .expect("Too many scopes") as usize; - } - - /// Moves out of the current scope. - /// - /// This method marks all the variables that are not reachable anymore. - /// - /// # Panics - /// This method panics if the current scope is already the root scope. - fn end_scope(&mut self) { - self.vars - .iter_mut() - .rev() - .take_while(|var| var.depth == self.current_depth as isize) - .for_each(|var| { - var.depth = -var.depth; - }); - - self.current_depth = self - .current_depth - .checked_sub(1) - .expect("Cannot end the root scope"); - } - - /// Looks up a variable by name that is reachable from the current scope. - fn lookup_reachable_local(&self, name: &str, registry: SymbolRegistry) -> Option<&Symbol> { - self.vars - .iter() - .rev() - .find(|var| var.depth >= 0 && var.name == name && registry.accepts(var.ty)) - } - - /// Gets the variable id from the current scope. - fn position_reachable_local(&self, name: &str, registry: SymbolRegistry) -> Option { - self.vars - .iter() - .rev() - .position(|var| var.depth >= 0 && var.name == name && registry.accepts(var.ty)) - .map(|idx| LocalId(self.vars.len() - 1 - idx)) - } -} - -#[derive(Debug, Clone, PartialEq)] -pub struct Symbol { - /// The name identifier of the symbol. - pub name: String, - - /// Additional information about the symbol - pub ty: SymbolInfo, - - /// The depth of the symbol. - /// - /// This is used to keep track if the variable is still reachable during the first - /// pass of the analyzer. The value is positive if the symbol's scope has not ended - /// yet. If it is out of scope, the value is negative, with the absolute value being - /// the depth of the scope where the variable was declared. - depth: isize, -} - -impl Symbol { - /// Creates a new symbol. - /// - /// This convenience method accepts negative values as depths, which are the internal - /// representations of unreachable variables. - pub fn scoped(name: String, depth: isize) -> Self { - Self { - name, - depth, - ty: SymbolInfo::Variable, - } - } - - /// Returns `true` if the variable can be accessed externally, without being - /// captured. - pub const fn is_exported(&self) -> bool { - self.depth == -1 - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn access_by_name() { - let mut locals = Locals::default(); - locals.declare("foo".to_owned(), SymbolInfo::Variable); - locals.begin_scope(); - locals.declare("bar".to_owned(), SymbolInfo::Variable); - assert_eq!( - locals.lookup_reachable_local("foo", SymbolRegistry::Objects), - Some(&Symbol::scoped("foo".to_owned(), 0)) - ); - - assert_eq!( - locals.lookup_reachable_local("bar", SymbolRegistry::Objects), - Some(&Symbol::scoped("bar".to_owned(), 1)) - ); - - assert_eq!( - locals.lookup_reachable_local("bar", SymbolRegistry::Types), - None - ); - } - - #[test] - fn access_out_of_scope() { - let mut locals = Locals::default(); - locals.begin_scope(); - locals.declare("bar".to_owned(), SymbolInfo::Variable); - locals.end_scope(); - assert_eq!( - locals.lookup_reachable_local("bar", SymbolRegistry::Objects), - None - ); - locals.begin_scope(); - assert_eq!( - locals.lookup_reachable_local("bar", SymbolRegistry::Objects), - None - ); - } - - #[test] - fn shadow_nested() { - let mut locals = Locals::default(); - locals.declare("foo".to_owned(), SymbolInfo::Variable); - locals.begin_scope(); - locals.begin_scope(); - locals.declare("foo".to_owned(), SymbolInfo::Variable); - assert_eq!( - locals.lookup_reachable_local("foo", SymbolRegistry::Objects), - Some(&Symbol::scoped("foo".to_owned(), 2)) - ); - locals.end_scope(); - assert_eq!( - locals.lookup_reachable_local("foo", SymbolRegistry::Objects), - Some(&Symbol::scoped("foo".to_owned(), 0)) - ); - locals.end_scope(); - assert_eq!( - locals.lookup_reachable_local("foo", SymbolRegistry::Objects), - Some(&Symbol::scoped("foo".to_owned(), 0)) - ); - } -} diff --git a/analyzer/src/types/hir.rs b/analyzer/src/hir.rs similarity index 52% rename from analyzer/src/types/hir.rs rename to analyzer/src/hir.rs index 512e9e7c..7b590cfd 100644 --- a/analyzer/src/types/hir.rs +++ b/analyzer/src/hir.rs @@ -1,46 +1,51 @@ -use crate::reef::ReefId; +use crate::typing::registry::{FunctionId, SchemaId}; +use crate::typing::user::{TypeId, ERROR_TYPE, UNIT_TYPE, UNKNOWN_TYPE}; +use crate::typing::variable::{LocalEnvironment, LocalId, Var}; +use crate::Reef; use ast::call::{RedirFd, RedirOp}; use ast::value::LiteralValue; -use context::source::{SourceSegment, SourceSegmentHolder}; +use context::source::Span; +use std::collections::hash_map::Values; +use std::collections::HashMap; +use std::path::PathBuf; +use std::rc::Rc; -use crate::relations::{LocalId, ResolvedSymbol, SourceId}; -use crate::types::engine::{FunctionId, StructureId}; -use crate::types::ty::TypeRef; -use crate::types::ERROR; - -#[derive(Clone, Copy, Debug, PartialEq, Hash, Eq)] -pub enum Var { - Local(LocalId), - External(ResolvedSymbol), -} - -/// A type checked expression attached to a source segment. #[derive(Clone, Debug, PartialEq)] pub struct TypedExpr { pub kind: ExprKind, - pub ty: TypeRef, - pub segment: SourceSegment, + pub ty: TypeId, + pub span: Span, } -impl SourceSegmentHolder for TypedExpr { - fn segment(&self) -> SourceSegment { - self.segment.clone() +impl TypedExpr { + pub fn noop(span: Span) -> Self { + Self { + kind: ExprKind::Noop, + span, + ty: UNIT_TYPE, + } + } + + pub fn error(span: Span) -> Self { + Self { + kind: ExprKind::Noop, + span, + ty: ERROR_TYPE, + } } } #[derive(Clone, Debug, PartialEq)] pub struct FieldAccess { pub object: Box, - pub structure: StructureId, - pub structure_reef: ReefId, + pub structure: SchemaId, pub field: LocalId, } #[derive(Clone, Debug, PartialEq)] pub struct FieldAssign { pub object: Box, - pub structure: StructureId, - pub structure_reef: ReefId, + pub structure: SchemaId, pub field: LocalId, pub new_value: Box, } @@ -53,7 +58,7 @@ pub struct LocalAssignment { #[derive(Clone, Debug, PartialEq)] pub struct Declaration { - pub identifier: LocalId, + pub identifier: Var, pub value: Option>, } @@ -64,12 +69,6 @@ pub struct Conditional { pub otherwise: Option>, } -#[derive(Clone, Debug, PartialEq)] -pub struct Convert { - pub inner: Box, - pub into: TypeRef, -} - #[derive(Clone, Debug, PartialEq)] pub struct Loop { pub condition: Option>, @@ -99,7 +98,7 @@ pub struct RangeFor { pub receiver: LocalId, /// The type of the receiver. - pub receiver_type: TypeRef, + pub receiver_type: TypeId, /// The range of values that will be iterated over. pub iterable: TypedExpr, @@ -119,9 +118,7 @@ pub struct ConditionalFor { #[derive(Clone, Debug, PartialEq)] pub struct FunctionCall { pub arguments: Vec, - pub reef: ReefId, pub function_id: FunctionId, - pub source_id: Option, } #[derive(Clone, Debug, PartialEq)] @@ -170,7 +167,6 @@ pub enum ExprKind { Conditional(Conditional), ConditionalLoop(Loop), ForLoop(ForLoop), - Convert(Convert), ProcessCall(Vec), FunctionCall(FunctionCall), MethodCall(MethodCall), @@ -186,18 +182,111 @@ pub enum ExprKind { } impl TypedExpr { - /// Creates a no-op expression that describes an error. - pub(crate) fn error(segment: SourceSegment) -> Self { + pub(crate) fn is_ok(&self) -> bool { + !self.is_err() + } + + pub(crate) fn is_err(&self) -> bool { + matches!(self.ty, UNKNOWN_TYPE | ERROR_TYPE) + } +} + +/// A unit of code. +pub struct Chunk { + /// The fully qualified name to access this chunk. + pub fqn: PathBuf, + + pub function: Option, + + /// The expression that this chunk represents. + pub expr: TypedExpr, + + pub locals: LocalEnvironment, +} + +/// A group of [`Chunk`]s that were defined in the same context (usually a file). +pub struct Module { + fqn: PathBuf, + chunks: Vec, + pub(crate) exports: HashMap, TypeId>, +} + +impl Module { + pub(crate) fn new(fqn: PathBuf) -> Self { Self { - kind: ExprKind::Noop, - ty: ERROR, - segment, + fqn, + chunks: Vec::new(), + exports: HashMap::new(), + } + } + + pub(crate) fn enter_namespace(&mut self, name: &str) { + self.fqn.push(name); + } + + pub(crate) fn exit_namespace(&mut self) { + self.fqn.pop(); + } + + pub(crate) fn add( + &mut self, + function: Option, + expr: TypedExpr, + locals: LocalEnvironment, + ) { + self.chunks.push(Chunk { + fqn: self.fqn.clone(), + function, + expr, + locals, + }); + } +} + +impl Reef { + pub fn group_by_content(&self) -> ContentIterator { + ContentIterator { + inner: self.hir.values(), } } +} + +pub type NamedExports = HashMap, TypeId>; + +/// A group of chunks that were defined in the same content. +#[derive(Copy, Clone)] +pub struct EncodableContent<'a> { + /// The main chunk of this content. + pub main: &'a Chunk, + + /// The functions that this content provides. + pub functions: &'a [Chunk], + + /// The exports that this content provides. + pub exports: &'a NamedExports, +} + +pub struct ContentIterator<'a> { + inner: Values<'a, PathBuf, Module>, +} + +impl<'a> Iterator for ContentIterator<'a> { + type Item = EncodableContent<'a>; - /// Sets the type of the expression to [`crate::types::ty::Type::Error`]. - pub(crate) fn poison(mut self) -> Self { - self.ty = ERROR; - self + fn next(&mut self) -> Option { + self.inner.next().map( + |Module { + fqn: _, + chunks, + exports, + }| { + let (main, functions) = chunks.split_last().unwrap(); + EncodableContent { + main, + functions, + exports, + } + }, + ) } } diff --git a/analyzer/src/hoist.rs b/analyzer/src/hoist.rs new file mode 100644 index 00000000..9487fd17 --- /dev/null +++ b/analyzer/src/hoist.rs @@ -0,0 +1,691 @@ +use crate::module::{Export, ModuleTree, ModuleView}; +use crate::symbol::{SymbolRegistry, SymbolTable, UndefinedSymbol}; +use crate::typing::function::{Function, FunctionKind}; +use crate::typing::schema::Schema; +use crate::typing::user::{ + lookup_builtin_type, TypeId, UserType, ERROR_TYPE, STRING_TYPE, UNIT_TYPE, +}; +use crate::typing::{Parameter, TypeChecker, TypeErrorKind}; +use crate::{Reef, SourceLocation, TypeError}; +use ast::function::{FunctionDeclaration, FunctionParameter}; +use ast::r#struct::{StructDeclaration, StructImpl}; +use ast::r#type::Type; +use ast::r#use::{Import, ImportedSymbol, InclusionPathItem}; +use ast::variable::TypedVariable; +use ast::Expr; +use context::source::{SourceSegmentHolder, Span}; +use parser::Root; +use std::collections::HashMap; +use std::ffi::OsString; +use std::path::{Path, PathBuf}; + +/// Places functions and types at the top level of the symbol table. +/// +/// Those symbols may be used before they are declared, so their name needs to be forward declared, +/// and partially known ahead of time. For instance, the following code should be valid, even though +/// `Bar` is used before it is declared: +/// ```text +/// struct Foo { +/// bar: Bar, +/// } +/// struct Bar {} +/// ``` +/// +/// This step performs multiple passes where types are added to the symbol table, and then their +/// fields are added to the type. A third pass is done to hoist functions, now that all parameters +/// and return types may be known. +pub(super) fn hoist_files( + foreign: &HashMap, + reef: &mut Reef, + checker: &mut TypeChecker, +) -> HoistingResult { + let mut errors = Vec::::new(); + let mut graph = HashMap::new(); + for (path, root) in &reef.files { + let mut table = SymbolTable::new(path.clone()); + let mut exports = reef.exports.take_exports(path); // This is not problematic if the export is not found, but it shouldn't happen + let modules = ModuleView::new(&reef.exports, foreign); + let mut deps = Dependencies { + modules, + requires: Vec::new(), + }; + hoist_type_names(root, checker, &mut table, &mut exports); + hoist_functions( + root, + checker, + &mut table, + &mut exports, + &mut deps, + &mut errors, + ); + graph.insert(path.clone(), deps.requires); + reef.exports.insert(path, exports); + reef.symbols.insert(path.clone(), table); + } + + let mut sorted: Vec = Vec::new(); + let mut frontier: Vec = graph + .keys() + .filter(|module| graph.values().all(|deps| !deps.contains(module))) + .cloned() + .collect(); + while let Some(module) = frontier.pop() { + sorted.push(module.clone()); + if let Some(requires) = graph.remove(&module) { + for require in requires { + if !graph.values().any(|deps| deps.contains(&require)) { + frontier.push(require); + } + } + } + } + sorted.reverse(); + + if !graph.is_empty() { + let mut cycle = graph + .drain() + .map(|(path, _)| path) + .collect::>(); + cycle.sort(); + let source = cycle.pop().expect("at least one item"); + errors.push(TypeError::new( + TypeErrorKind::CircularDependency { cycle }, + SourceLocation::new(source, Span::default()), + )); + } + + HoistingResult { errors, sorted } +} + +pub(crate) struct HoistingResult { + pub(crate) errors: Vec, + pub(crate) sorted: Vec, +} + +struct Dependencies<'a> { + modules: ModuleView<'a>, + requires: Vec, // TODO: use the entire path instead +} + +fn hoist_type_names( + root: &Root, + checker: &mut TypeChecker, + table: &mut SymbolTable, + exports: &mut [Export], +) { + for expr in &root.expressions { + if let Expr::StructDeclaration(StructDeclaration { + name, + segment: span, + .. + }) = expr + { + let schema = checker + .registry + .define_schema(Schema::new(name.value.to_string())); + let ty = checker.types.alloc(UserType::Parametrized { + schema, + params: Vec::new(), + }); + table.insert_local(name.to_string(), ty, span.clone(), SymbolRegistry::Type); + if let Some(export) = exports + .iter_mut() + .find(|export| export.name == name.value && export.registry == SymbolRegistry::Type) + { + export.ty = ty; + } + } + } +} + +fn hoist_functions( + root: &Root, + checker: &mut TypeChecker, + table: &mut SymbolTable, + exports: &mut [Export], + deps: &mut Dependencies, + errors: &mut Vec, +) { + for expr in &root.expressions { + match expr { + Expr::FunctionDeclaration(fn_decl) => { + hoist_fn_decl(fn_decl, None, checker, table, exports, errors); + } + Expr::StructDeclaration(struct_decl) => { + hoist_struct_decl(struct_decl, checker, table, errors); + } + Expr::Impl(impl_decl) => { + hoist_impl_decl(impl_decl, checker, table, exports, errors); + } + Expr::Use(import) => { + match &import.import { + Import::Symbol(ImportedSymbol { + path, + alias, + segment: span, + }) => { + let (last, rest) = path.split_last().expect("at least one item"); + if let Some(module) = deps.modules.get_direct(rest) { + for export in &module.exports { + if export.name == last.name() { + table.insert_remote( + alias + .as_ref() + .map(|alias| alias.value.as_str()) + .unwrap_or(last.name()) + .to_owned(), + span.clone(), + export, + ); + if export.registry == SymbolRegistry::Variable { + // FIXME: only work in 'reef::path::path::variable' case + let path = rest + .iter() + .skip_while(|item| { + matches!(item, InclusionPathItem::Reef(_)) + }) + .map(|item| item.name()) + .collect::(); + deps.requires.push(path); + } + } + } + } + } + Import::AllIn(path, _) => todo!(), + Import::Environment(_) => {} + Import::List(list) => { + todo!() + } + } + } + _ => {} + } + } +} + +struct CurrentType { + current_ty: TypeId, + current_generics: Vec, +} + +fn hoist_fn_decl( + FunctionDeclaration { + name, + type_parameters, + parameters, + return_type, + .. + }: &FunctionDeclaration, + current_ty: Option, + checker: &mut TypeChecker, + table: &mut SymbolTable, + exports: &mut [Export], + errors: &mut Vec, +) { + table.enter_scope(); + let (current_ty, mut generic_variables) = match current_ty { + Some(CurrentType { + current_ty, + current_generics, + }) => (Some(current_ty), current_generics), + None => (None, Vec::new()), + }; + generic_variables.extend(type_parameters.iter().map(|param| { + checker + .types + .alloc(UserType::GenericVariable(param.name.to_string())) + })); + for (name, ty) in type_parameters.iter().zip(generic_variables.iter()) { + table.insert_local( + name.name.to_string(), + *ty, + name.segment(), + SymbolRegistry::Type, + ); + } + let param_types = parameters + .iter() + .map(|param| { + let ty = match check_parameter_type(current_ty, param, table, checker) { + Ok(ty) => ty, + Err(err) => { + errors.push(err.into_general(&table.path)); + ERROR_TYPE + } + }; + Parameter { + ty, + span: param.segment(), + } + }) + .collect::>(); + let return_type = match return_type + .as_ref() + .map(|ty| check_type(ty, table, checker)) + { + Some(Ok(ty)) => ty, + Some(Err(err)) => { + errors.push(err.into_general(&table.path)); + ERROR_TYPE + } + None => UNIT_TYPE, + }; + let mut fqn = table.path.clone(); + fqn.push(name.value.to_string()); + let function = checker.registry.define_function(Function { + declared_at: table.path.clone(), + fqn, + generic_variables, + param_types, + return_type, + kind: FunctionKind::Function, + }); + let function_type = checker.types.alloc(UserType::Function(function)); + match current_ty { + Some(current_ty) => { + let UserType::Parametrized { schema, .. } = checker.types[current_ty] else { + panic!( + "the current type should be a struct, got {:?}", + checker.types[current_ty] + ); + }; + let Schema { + ref mut methods, .. + } = checker.registry[schema]; + methods.insert(name.to_string(), function); + } + None => table.insert_local( + name.to_string(), + function_type, + name.segment(), + SymbolRegistry::Function, + ), + }; + if let Some(Export { ty, .. }) = exports + .iter_mut() + .find(|export| export.name == *name.value && export.registry == SymbolRegistry::Function) + { + *ty = function_type; + } +} + +fn hoist_struct_decl( + StructDeclaration { + name, + parameters, + fields, + .. + }: &StructDeclaration, + checker: &mut TypeChecker, + table: &mut SymbolTable, + errors: &mut Vec, +) { + let ty = table + .get(name.value.as_str(), SymbolRegistry::Type) + .expect("the type should be in the table") + .ty; + let UserType::Parametrized { schema, .. } = checker.types[ty] else { + panic!("the type should have a schema"); + }; + let generics = parameters + .iter() + .map(|param| { + checker + .types + .alloc(UserType::GenericVariable(param.name.to_string())) + }) + .collect::>(); + table.enter_scope(); + for (name, ty) in parameters.iter().zip(generics.iter()) { + table.insert_local( + name.name.to_string(), + *ty, + name.segment(), + SymbolRegistry::Type, + ); + } + let fields_types = fields + .iter() + .map(|field| { + let ty = match check_type(&field.tpe, table, checker) { + Ok(ty) => ty, + Err(err) => { + errors.push(err.into_general(&table.path)); + ERROR_TYPE + } + }; + ( + field.name.to_string(), + Parameter { + ty, + span: field.tpe.segment(), + }, + ) + }) + .collect::>(); + let Schema { + ref mut generic_variables, + ref mut fields, + .. + } = checker.registry[schema]; + generic_variables.extend(generics); + fields.extend(fields_types); + table.exit_scope(); +} + +fn hoist_impl_decl( + StructImpl { + type_parameters, + impl_type, + functions, + .. + }: &StructImpl, + checker: &mut TypeChecker, + table: &mut SymbolTable, + exports: &mut [Export], + errors: &mut Vec, +) { + table.enter_scope(); + let generic_variables = type_parameters + .iter() + .map(|param| { + checker + .types + .alloc(UserType::GenericVariable(param.name.to_string())) + }) + .collect::>(); + for (name, ty) in type_parameters.iter().zip(generic_variables.iter()) { + table.insert_local( + name.name.to_string(), + *ty, + name.segment(), + SymbolRegistry::Type, + ); + } + let impl_ty = match check_type(impl_type, table, checker) { + Ok(ty) => { + if let UserType::Parametrized { .. } = checker.types[ty] { + ty + } else { + errors.push(TypeError::new( + TypeErrorKind::CannotImplPrimitive, + SourceLocation::new(table.path.clone(), impl_type.segment()), + )); + ERROR_TYPE + } + } + Err(err) => { + errors.push(err.into_general(&table.path)); + ERROR_TYPE + } + }; + if impl_ty.is_ok() { + for function in functions { + let current = CurrentType { + current_ty: impl_ty, + current_generics: generic_variables.clone(), + }; + hoist_fn_decl(function, Some(current), checker, table, exports, errors); + } + } + table.exit_scope(); +} + +fn check_parameter_type<'a>( + current_ty: Option, + param: &'a FunctionParameter, + table: &'a SymbolTable, + checker: &mut TypeChecker, +) -> Result> { + match param { + FunctionParameter::Named(ty) => check_type_sig(ty, table, checker), + FunctionParameter::Variadic(Some(ty), _) => check_type(ty, table, checker), + FunctionParameter::Variadic(None, _) => Ok(STRING_TYPE), + FunctionParameter::Slf(_) => { + current_ty.ok_or(InvalidType::SelfOutsideImpl(param.segment())) + } + } +} + +fn check_type_sig<'a>( + sig: &'a TypedVariable, + table: &SymbolTable, + checker: &mut TypeChecker, +) -> Result> { + if let Some(ty) = &sig.ty { + check_type(ty, table, checker) + } else { + Err(InvalidType::MissingType(sig.segment())) + } +} + +fn check_type<'a>( + ty: &'a Type, + table: &SymbolTable, + checker: &mut TypeChecker, +) -> Result> { + let Type::Parametrized(ty) = ty else { + return Err(InvalidType::MissingType(ty.segment())); + }; + let [path] = ty.path.as_slice() else { + return Err(InvalidType::MissingType(ty.segment.clone())); + }; + let params = ty + .params + .iter() + .map(|param| check_type(param, table, checker)) + .collect::, _>>()?; + let name = path.name(); + let ty = match lookup_builtin_type(name) { + Some(ty) => ty, + None => match table.lookup(name, SymbolRegistry::Type) { + Ok(symbol) => symbol.ty, + Err(inner) => { + return Err(InvalidType::UnknownType { + name, + span: ty.segment.clone(), + inner, + }); + } + }, + }; + match checker.types[ty] { + UserType::Parametrized { schema, .. } => Ok(if params.is_empty() { + ty + } else { + checker + .types + .alloc(UserType::Parametrized { schema, params }) + }), + _ => Ok(ty), + } +} + +pub(super) enum InvalidType<'a> { + SelfOutsideImpl(Span), + MissingType(Span), + UnknownType { + name: &'a str, + span: Span, + inner: UndefinedSymbol, + }, +} + +impl InvalidType<'_> { + pub(super) fn into_general(self, path: &Path) -> TypeError { + match self { + Self::SelfOutsideImpl(span) => TypeError::new( + TypeErrorKind::UnexpectedSelfParameter, + SourceLocation::new(path.to_owned(), span), + ), + Self::MissingType(span) => TypeError::new( + TypeErrorKind::MissingType, + SourceLocation::new(path.to_owned(), span), + ), + Self::UnknownType { name, span, inner } => TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: name.to_owned(), + expected: SymbolRegistry::Type, + found: match inner { + UndefinedSymbol::NotFound => None, + UndefinedSymbol::WrongRegistry(symbol) => Some(symbol), + }, + }, + SourceLocation::new(path.to_owned(), span), + ), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::module::import_multi; + use crate::MemoryFilesystem; + use std::ffi::OsString; + use std::path::PathBuf; + + fn hoist_files(fs: MemoryFilesystem, entrypoint: &str) -> Vec { + let mut reef = Reef::new(OsString::from("test")); + assert_eq!( + import_multi(&mut reef, &fs, entrypoint), + [], + "no import errors should be found" + ); + super::hoist_files(&mut HashMap::new(), &mut reef, &mut TypeChecker::default()).errors + } + + fn hoist(source: &str) -> Vec { + let fs = MemoryFilesystem::new(HashMap::from([(PathBuf::from("main.msh"), source)])); + hoist_files(fs, "main.msh") + } + + fn hoist_multi(sources: [(PathBuf, &str); N]) -> Vec { + let entrypoint = sources + .first() + .expect("at least one source") + .0 + .display() + .to_string(); + hoist_files( + MemoryFilesystem::from_iter(sources.into_iter()), + &entrypoint, + ) + } + + #[test] + fn function_use_builtin_type() { + let errors = hoist("fun a(x: Int);"); + assert_eq!(errors, []); + } + + #[test] + fn function_use_unknown_type() { + let errors = hoist("fun a(x: Foo);"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: "Foo".to_string(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new(PathBuf::from("main.msh"), 9..12), + )] + ); + } + + #[test] + fn function_use_type_below() { + let errors = hoist("fun a(x: Foo); struct Foo {}"); + assert_eq!(errors, []); + } + + #[test] + fn self_outside_impl() { + let errors = hoist("fun a(self);"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::UnexpectedSelfParameter, + SourceLocation::new(PathBuf::from("main.msh"), 6..10), + )] + ); + } + + #[test] + fn generic_field_struct() { + let errors = hoist("struct Box[T] { value: T }"); + assert_eq!(errors, []); + } + + #[test] + fn dont_share_t() { + let errors = hoist("struct Foo[T] {}\nfun test() -> T;"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: "T".to_owned(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new(PathBuf::from("main.msh"), 31..32) + )] + ); + } + + #[test] + fn import_type() { + let errors = hoist_multi([ + (PathBuf::from("main"), "use reef::foo::Foo; fun a(x: Foo);"), + (PathBuf::from("foo"), "struct Foo {}"), + ]); + assert_eq!(errors, []); + } + + #[test] + fn report_variable_cycle() { + let errors = hoist_multi([ + (PathBuf::from("main"), "use reef::test::bar\nval expose"), + (PathBuf::from("test"), "use reef::main::expose\nval bar"), + ]); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::CircularDependency { + cycle: vec![PathBuf::from("main")], + }, + SourceLocation::new(PathBuf::from("test"), 0..0) + )] + ); + } + + #[test] + fn no_variable_init_order() { + let errors = hoist_multi([ + ( + PathBuf::from("foo"), + "use reef::math::bar as baz +val foo = 9 +val bar = $baz", + ), + ( + PathBuf::from("math"), + "use reef::foo::foo +val foo = $foo +val bar = 4", + ), + ]); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::CircularDependency { + cycle: vec![PathBuf::from("foo")], + }, + SourceLocation::new(PathBuf::from("math"), 0..0) + )] + ); + } +} diff --git a/analyzer/src/importer.rs b/analyzer/src/importer.rs deleted file mode 100644 index 9eb8f461..00000000 --- a/analyzer/src/importer.rs +++ /dev/null @@ -1,88 +0,0 @@ -use std::collections::HashMap; - -use ast::Expr; -use context::source::ContentId; - -use crate::name::Name; - -/// An imported expression that is bound to a content identifier. -#[derive(Debug, Clone, PartialEq)] -pub struct Imported { - /// The content identifier from which the expression was imported. - pub content: ContentId, - - /// The imported expression. - pub expr: Expr, -} - -/// The outcome when trying to get an expression from a [`Name`]. -#[derive(Debug, PartialEq)] -pub enum ImportResult { - /// The import was successful and can be used. - Success(Imported), - - /// The source could not be found. Another name may be tried. - NotFound, - - /// The source has been found but could not be retrieved. - /// - /// This error is fatal and should not be ignored. - Failure, -} - -/// Import an abstract syntax tree from a given name. -pub trait ASTImporter { - /// Gets an expression from the given import name. - /// - /// This method should return [`ImportResult::NotFound`] if a source with - /// the given name could not be found. If the source could be found, but for - /// any reason could not be retrieved (because of IO or parsing errors), - /// this method should return [`ImportResult::Failure`]. Implementers of - /// this trait may expose the actual error types. - fn import(&mut self, name: &Name) -> ImportResult; -} - -/// An importer with predefined sources. -/// This importer implementation should only be used for tests. -pub struct StaticImporter<'a, F> -where - F: Fn(&'a str) -> Expr, -{ - ast_factory: F, - sources: HashMap, -} - -impl<'a, P> StaticImporter<'a, P> -where - P: Fn(&'a str) -> Expr, -{ - pub fn new(sources: [(Name, &'a str); N], ast_supplier: P) -> Self { - Self { - ast_factory: ast_supplier, - sources: HashMap::from(sources), - } - } -} - -impl<'a, P> ASTImporter for StaticImporter<'a, P> -where - P: Fn(&'a str) -> Expr, -{ - fn import(&mut self, name: &Name) -> ImportResult { - let ast = self.sources.get(name).map(|src| (self.ast_factory)(src)); - ast.map(|expr| Imported { - content: ContentId(0), - expr, - }) - .into() - } -} - -impl From> for ImportResult { - fn from(opt: Option) -> Self { - match opt { - Some(imported) => ImportResult::Success(imported), - None => ImportResult::NotFound, - } - } -} diff --git a/analyzer/src/imports.rs b/analyzer/src/imports.rs deleted file mode 100644 index 0e209833..00000000 --- a/analyzer/src/imports.rs +++ /dev/null @@ -1,148 +0,0 @@ -use std::collections::HashMap; -use std::fmt::{Debug, Formatter}; - -use indexmap::IndexMap; - -use crate::environment::symbols::{SymbolLocation, SymbolRegistry}; -use crate::reef::ReefId; -use context::source::SourceSegment; - -use crate::relations::{ResolvedSymbol, SourceId}; - -#[derive(Debug, Default)] -pub struct Imports { - /// Associates a source object with its imports. - /// - /// Imports may only be declared at the top level of a source. This lets us track the unresolved imports - /// per [`crate::environment::Environment`]. If a source is not tracked here, it means that it has no - /// imports. - imports: HashMap, -} - -impl Imports { - /// References a new import directive in the given source. - /// - /// This directive may be used later to resolve the import. - pub fn add_unresolved_import( - &mut self, - source: SourceId, - import: UnresolvedImport, - import_expr: SourceSegment, - ) -> Option { - let imports = self.imports.entry(source).or_default(); - imports.add_unresolved_import(import, import_expr) - } - - pub fn get_imports(&self, source: SourceId) -> Option<&SourceImports> { - self.imports.get(&source) - } - - pub fn get_imports_mut(&mut self, source: SourceId) -> Option<&mut SourceImports> { - self.imports.get_mut(&source) - } - - /// Removes all the imports that were declared at or after the given source. - pub fn retain_before(&mut self, source: SourceId) { - self.imports.retain(|id, _| id.0 < source.0); - } -} - -/// The structure that hosts the unresolved and resolved imported symbols of an environment -#[derive(PartialEq, Default)] -pub struct SourceImports { - /// The imports that still needs to be resolved. - /// Binds an [UnresolvedImport] to the segment that introduced the import. - unresolved_imports: IndexMap, - - /// Binds a symbol name to its resolved import, with the source segment where the import is declared. - imported_symbols: HashMap, -} - -#[derive(Debug, Eq, PartialEq, Hash)] -pub enum UnresolvedImport { - /// A symbol import with an optional alias. - Symbol { - alias: Option, - loc: SymbolLocation, - }, - /// Variant to target all the exported symbols of a symbol - AllIn(SymbolLocation), -} - -/// A resolved symbol import -#[derive(PartialEq, Eq, Debug)] -pub enum ResolvedImport { - /// The import is a symbol name. - /// A (non empty) hashmap contains the binding, for the bound name, of the symbol according to its registry - Symbols(HashMap), - /// The import is an environment - Env { - /// The reef where the environment is defined - reef: ReefId, - /// The source id of the environment - source: SourceId, - }, - /// The import is unresolvable - Dead, -} - -impl Debug for SourceImports { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let mut imported_symbols: Vec<_> = self.imported_symbols.iter().collect(); - imported_symbols.sort_by_key(|(k, _)| *k); - f.debug_struct("Imports") - .field("imported_symbols", &imported_symbols) - .field("unresolved_imports", &self.unresolved_imports) - .finish() - } -} - -impl SourceImports { - pub fn new(unresolved_imports: IndexMap) -> Self { - Self { - unresolved_imports, - imported_symbols: HashMap::new(), - } - } - - #[cfg(test)] - pub fn with( - unresolved_imports: IndexMap, - imported_symbols: HashMap, - ) -> Self { - Self { - unresolved_imports, - imported_symbols, - } - } - - ///Adds an unresolved import, placing the given `import_expr` as the dependent . - pub fn add_unresolved_import( - &mut self, - import: UnresolvedImport, - segment: SourceSegment, - ) -> Option { - self.unresolved_imports.insert(import, segment) - } - - pub fn take_unresolved_imports(&mut self) -> IndexMap { - std::mem::take(&mut self.unresolved_imports) - } - - pub fn set_resolved_import( - &mut self, - name: String, - resolved: ResolvedImport, - segment: SourceSegment, - ) { - self.imported_symbols.insert(name, (resolved, segment)); - } - - pub fn get_import(&self, name: &str) -> Option<&ResolvedImport> { - self.imported_symbols.get(name).map(|(i, _)| i) - } - - pub fn get_import_segment(&self, name: &str) -> Option { - self.imported_symbols.get(name).map(|(_, s)| s.clone()) - } -} diff --git a/analyzer/src/lib.rs b/analyzer/src/lib.rs index f65d1c27..7731bed2 100644 --- a/analyzer/src/lib.rs +++ b/analyzer/src/lib.rs @@ -1,257 +1,169 @@ -//! The Moshell analyzer takes a dynamic set of sources, resolves them and diagnoses -//! errors. It produces at the end a typed intermediate representation of the -//! program that can be directly compiled. +//! Explores a whole Moshell source tree and verify it. //! -//! The analyzer is composed of a set of steps that are executed in order. Each -//! step is responsible for a specific task, and the analyzer is responsible for -//! orchestrating the steps. Those tasks fail-fast, meaning that if a step fails, -//! the analyzer will stop and will not try to execute the next steps. +//! Moshell is a modular and statically typed language. Sources files are organized in libraries +//! called "reefs" and reefs are composed of a hierarchy of modules that contains code. [`Reef`]s +//! are analyzed one at a time and may have non-cyclic dependencies between them. The analysis +//! fills a [`Database`] with a type-checked representation of the code. //! -//! If you want to know more about the steps, you can check the [`steps`] module. +//! The analysis is done in a pipeline: +//! 1. *Importing*: the whole project is parsed and indexed in a list of exports and imports. +//! 2. *Hoisting*: the types and symbols are discovered and placed in the global scope of each +//! module. +//! 3. *Type checking*: the types are checked for consistency and errors are reported. //! -//! The main usage of the analyzer is to verify that the sources are valid before -//! compiling them. -//! If you also need to keep the state of the analysis, you can use the [`Analyzer`] -//! struct directly, that offers a more fine-grained control over the analysis. - -#![allow(dead_code)] - -use crate::diagnostic::Diagnostic; -use crate::engine::Engine; -use crate::importer::{ASTImporter, Imported}; -use crate::imports::Imports; -use crate::name::Name; -use crate::reef::Externals; -use crate::relations::{Relations, SourceId}; -use crate::steps::collect::SymbolCollector; -use crate::steps::resolve_sources; -use crate::steps::typing::apply_types; -use crate::types::ctx::TypeContext; -use crate::types::engine::TypedEngine; -use crate::types::Typing; -use std::collections::HashSet; - -pub mod diagnostic; -pub mod engine; -pub mod environment; -pub mod importer; -pub mod name; -pub mod relations; - -mod dependency; -pub mod imports; -pub mod reef; -pub mod steps; -pub mod types; - -/// Discovers the sources that are imported by the given source in the importer. -/// -/// The returned analyzer contains the discovered sources and the diagnostics -/// that were generated, be sure to check them for errors. -pub fn analyze<'a>( - entry_point: Name, - importer: &mut impl ASTImporter, - externals: &Externals, -) -> Analyzer<'a> { - let mut analyzer = Analyzer::new(); - analyzer.process(entry_point, importer, externals); - analyzer +//! Each phase uses the results of the previous ones, but each phase take can work with partial +//! results. When the input has traversed the whole pipeline, it may be considered as valid to +//! pass to a compiler. + +pub mod hir; +mod hoist; +mod module; +pub mod symbol; +pub mod typing; + +use crate::hoist::hoist_files; +use crate::module::{import_multi, ModuleTree}; +use crate::symbol::SymbolTable; +use crate::typing::{type_check, TypeChecker, TypeError}; +use context::source::Span; +use parser::err::ParseError; +use std::collections::HashMap; +use std::ffi::OsString; +use std::io; +use std::path::{Path, PathBuf}; + +/// A byte range in a file. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SourceLocation { + /// The path to the file, relative to the project root. + pub path: PathBuf, + + /// The byte indices of the start and end of the span. + pub span: Span, } -/// Processes sources to resolve symbols and apply types. -#[derive(Default)] -pub struct Analyzer<'a> { - /// The current state of the resolution. - pub resolution: ResolutionResult<'a>, - - /// The current type knowledge. - pub typing: Typing, - - pub type_context: TypeContext, +impl SourceLocation { + /// Creates a new [`SourceLocation`]. + pub fn new(path: PathBuf, span: Span) -> Self { + Self { path, span } + } +} - /// The applied types over the [`Engine`]. - pub engine: TypedEngine, +#[derive(Debug)] +pub enum PipelineError { + Import { + path: PathBuf, + error: io::Error, + cause: Option, + }, + Parse { + path: PathBuf, + error: ParseError, + }, + Type(TypeError), +} - /// The diagnostics that were generated during the analysis. - diagnostics: Vec, +/// Fetches a content string given a path. +pub trait Filesystem { + /// Reads the content of a file. + fn read(&self, path: &Path) -> io::Result; } -impl<'a> Analyzer<'a> { - /// Creates a new empty analyzer. - pub fn new() -> Self { - Self::default() - } +/// A [`Filesystem`] that stores files in memory. +pub(crate) struct MemoryFilesystem<'a> { + files: HashMap, +} - /// Analyse starting from the given entry point. - pub fn process( - &mut self, - entry_point: Name, - importer: &mut impl ASTImporter, - externals: &Externals, - ) -> Analysis<'a, '_> { - let last_next_source_id = SourceId(self.resolution.engine.len()); - resolve_sources( - vec![entry_point], - &mut self.resolution, - importer, - externals, - &mut self.diagnostics, - ); - if self.diagnostics.is_empty() { - let (engine, context, typing) = apply_types( - &self.resolution.engine, - &self.resolution.relations, - externals, - &mut self.diagnostics, - ); - self.engine = engine; - self.type_context = context; - self.typing = typing; - } - Analysis { - analyzer: self, - last_next_source_id, - } +impl Filesystem for MemoryFilesystem<'_> { + fn read(&self, path: &Path) -> io::Result { + self.files + .get(path) + .map(|&content| content.to_owned()) + .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "file not found")) } +} - /// Performs an injection of a source as it would be a new entry point. - /// - /// # Panics - /// If the injection refers to itself, this method will panic. - pub fn inject( - &mut self, - inject: Inject, - importer: &mut impl ASTImporter, - externals: &Externals, - ) -> Analysis<'a, '_> { - let last_next_source_id = SourceId(self.resolution.engine.len()); - let name = inject.name.clone(); - let mut visit = vec![name.clone()]; - - self.diagnostics.extend(SymbolCollector::inject( - inject, - &mut self.resolution.engine, - &mut self.resolution.relations, - &mut self.resolution.imports, - externals, - &mut visit, - )); - self.resolution.visited.insert(name); - - resolve_sources( - visit, - &mut self.resolution, - importer, - externals, - &mut self.diagnostics, - ); - if self.diagnostics.is_empty() { - let (engine, context, typing) = apply_types( - &self.resolution.engine, - &self.resolution.relations, - externals, - &mut self.diagnostics, - ); - self.engine = engine; - self.type_context = context; - self.typing = typing; - } - Analysis { - analyzer: self, - last_next_source_id, - } +impl<'a> MemoryFilesystem<'a> { + pub(crate) fn new(files: HashMap) -> Self { + Self { files } } +} - /// Takes the diagnostics that were generated during the analysis - pub fn take_diagnostics(&mut self) -> Vec { - std::mem::take(&mut self.diagnostics) +impl<'a, P: AsRef> FromIterator<(P, &'a str)> for MemoryFilesystem<'a> { + fn from_iter>(iter: T) -> Self { + Self::new( + iter.into_iter() + .map(|(path, content)| (path.as_ref().to_path_buf(), content)) + .collect(), + ) } } -/// An analysis result that can be observed and reverted. -pub struct Analysis<'a, 'revert> { - /// Takes the unique ownership of the analyzer to prevent any further modification - /// that would invalidate any revert. - analyzer: &'revert mut Analyzer<'a>, - - /// Reverting the operation means internally removing all the sources that were added - /// after the last stable state. - last_next_source_id: SourceId, +/// A global storage of pre-analyzed modules and their types. +#[derive(Default)] +pub struct Database { + exports: HashMap, + pub checker: TypeChecker, } -impl Analysis<'_, '_> { - /// Gets a immutable reference to the analyzer, in order to preview the changes. - /// - /// To get back a mutable reference, simply drop the [`Analysis`] or call - /// [`Analysis::revert`]. - pub fn analyzer(&self) -> &Analyzer<'_> { - self.analyzer - } +/// A yet-to-be-analyzed set of files. +pub struct Reef { + /// The parsed abstract syntax trees of the files. + files: HashMap, - /// Returns the source id of the injected source. - pub fn attributed_id(&self) -> SourceId { - SourceId(self.last_next_source_id.0) - } + /// The export tree representing each module. + exports: ModuleTree, - #[must_use = "This method does not revert the analysis, `Analysis::revert` must be called"] - pub fn take_diagnostics(&mut self) -> Vec { - std::mem::take(&mut self.analyzer.diagnostics) - } + /// The symbols that have been found in each file. + symbols: HashMap, - /// Performs a one-way operation to revert all the changes made by the injection. - /// - /// This drops the [`Analyzer`] unique ownership. - pub fn revert(self) { - let id = self.last_next_source_id; - let resolution = &mut self.analyzer.resolution; - for (_, _, env) in resolution.engine.origins.drain(id.0..) { - if let Some(env) = env { - resolution.visited.remove(&env.fqn); - } - } - resolution.relations.retain_before(id); - resolution.imports.retain_before(id); - } + /// The high-level typed intermediate representation of the code. + hir: HashMap, } -/// Performs a full resolution of the environments directly or indirectly implied by the entry point. -/// -/// The completion of a collection followed by its resolution phase is called a cycle. -/// Multiple cycles can occur if the resolution phase finds new modules to collect. -pub fn resolve_all<'a>( - entry_point: Name, - externals: &'a Externals, - importer: &mut impl ASTImporter, - diagnostics: &mut Vec, -) -> ResolutionResult<'a> { - let mut result = ResolutionResult::default(); - resolve_sources( - vec![entry_point], - &mut result, - importer, - externals, - diagnostics, - ); - result +impl Reef { + /// Creates a new empty library with a given name. + pub fn new(name: OsString) -> Self { + Self { + files: HashMap::new(), + exports: ModuleTree::new(name), + symbols: HashMap::new(), + hir: HashMap::new(), + } + } } -/// A specially crafted input to inject at a specific point in the analyzer. -pub struct Inject { - /// The name of the source to inject. - pub name: Name, - - /// The imported content. - pub imported: Imported, - - /// The environment to inject the source into. - pub attached: Option, +impl Database { + pub fn new() -> Self { + Self::default() + } } -/// The results of an analysis -#[derive(Debug, Default)] -pub struct ResolutionResult<'e> { - pub engine: Engine<'e>, - pub relations: Relations, - imports: Imports, - visited: HashSet, +/// Populates the database with a fail-fast strategy. +pub fn analyze_multi( + database: &mut Database, + reef: &mut Reef, + fs: &dyn Filesystem, + entrypoint: &str, +) -> Vec { + let mut errors = Vec::::new(); + errors.extend( + import_multi(reef, fs, entrypoint) + .into_iter() + .map(PipelineError::from), + ); + if !errors.is_empty() { + return errors; + } + let hoist_result = hoist_files(&database.exports, reef, &mut database.checker); + errors.extend(hoist_result.errors.into_iter().map(PipelineError::from)); + if !errors.is_empty() { + return errors; + } + errors.extend( + type_check(reef, database, hoist_result.sorted) + .into_iter() + .map(PipelineError::from), + ); + errors } diff --git a/analyzer/src/module.rs b/analyzer/src/module.rs new file mode 100644 index 00000000..b85bb0bd --- /dev/null +++ b/analyzer/src/module.rs @@ -0,0 +1,552 @@ +//! A source explorer to create an in-memory representation of the units of a reef. +//! +//! # Overview +//! In order to perform a full static analysis of a program, the analyzer needs to know about all +//! the files and their symbols. The importer starts from an entrypoint and recursively imports all +//! the referred names. While there is content to discover, it will ask the [`Filesystem`] to read +//! what's behind the path. Each reef may contain multiple files, grouped in different modules, that +//! themselves may contain other modules. +//! +//! A path may refer to a module, i.e. a complete file, or a precise symbol within a module. This +//! first analysis step seeks to concretize these paths, because the analyzer needs the full source +//! code to construct a virtual representations of the symbols and their modules. When encountering +//! a path, it will try to import it as file. It will find nothing if the path ends with the symbol +//! that should be in that file. So it will try again, but with the last component removed. If it +//! is still not found, it will continue to pop the path components until it finds a file to parse. + +use crate::symbol::SymbolRegistry; +use crate::typing::user::{TypeId, UNKNOWN_TYPE}; +use crate::typing::{TypeError, TypeErrorKind}; +use crate::{Filesystem, PipelineError, Reef, SourceLocation}; +use ast::call::ProgrammaticCall; +use ast::function::FunctionDeclaration; +use ast::r#use::{Import as ImportExpr, ImportedSymbol, InclusionPathItem, Use}; +use ast::variable::VarDeclaration; +use ast::Expr; +use context::source::{SourceSegment, SourceSegmentHolder, Span}; +use parser::err::ParseError; +use parser::Root; +use std::collections::{HashMap, HashSet}; +use std::ffi::{OsStr, OsString}; +use std::io; +use std::path::{Path, PathBuf}; + +/// A symbol that can be accessed from other modules. +pub(super) struct Export { + /// The name from which this symbol can be accessed. + pub(super) name: String, + + /// The span where this symbol is defined. + pub(super) span: Span, + + /// The kind of symbol that it is. + pub(super) registry: SymbolRegistry, + + /// The type of the symbol. + /// + /// It may be an [`UNKNOWN_TYPE`] if the type is not known yet. + pub(super) ty: TypeId, +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub(crate) struct ResolvedImport { + pub(crate) path: PathBuf, + pub(crate) export_idx: usize, +} + +/// An instruction to import a module. +#[derive(Debug)] +struct Import { + path: PathBuf, + origin: Option, +} + +#[derive(Debug)] +pub(super) enum ModuleError { + /// A requested module cannot be accessed or read. + /// + /// The module may be directly requested or indirectly imported by another module. + Import { + /// The inner error that caused the import to fail. + error: io::Error, + + /// The location of the import statement that caused the error. + /// + /// It may be `None` if the error comes from the entrypoint, as there is no span to point to. + cause: Option, + }, + + /// A module encountered a syntax error while being parsed. + Parse { + /// The path to the module that caused the error. + path: PathBuf, + + /// The syntax error that occurred while parsing the module. + error: ParseError, + }, + Duplicate { + /// The name of the symbol that is exported multiple times. + name: String, + path: PathBuf, + first: Span, + second: Span, + }, +} + +impl PartialEq for ModuleError { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::Import { error: a, cause: b }, Self::Import { error: x, cause: y }) => { + a.kind() == x.kind() && b == y + } + (Self::Parse { path: a, error: b }, Self::Parse { path: x, error: y }) => { + a == x && b == y + } + ( + Self::Duplicate { + name: a, + path: b, + first: c, + second: d, + }, + Self::Duplicate { + name: x, + path: y, + first: z, + second: w, + }, + ) => a == x && b == y && c == z && d == w, + _ => false, + } + } +} + +impl From for PipelineError { + fn from(error: ModuleError) -> Self { + match error { + ModuleError::Import { error, cause } => PipelineError::Import { + path: PathBuf::new(), + error, + cause, + }, + ModuleError::Parse { path, error } => PipelineError::Parse { path, error }, + ModuleError::Duplicate { + name, + path, + first, + second, + } => PipelineError::Type(TypeError::new( + TypeErrorKind::DuplicateSymbol { + name, + previous: first, + }, + SourceLocation::new(path, second), + )), + } + } +} + +/// A module entry in a tree of modules. +/// +/// Modules abstract over the filesystem where each entry exposes a list of exports, i.e. symbols +/// that can be accessed from other modules. Those symbols may refer to functions, types, constants +/// or other modules. +pub struct ModuleTree { + /// The local name of the module. + pub name: OsString, + + /// The public symbols that this module exports. + /// + /// Each visitor should access this vector to find the symbols that can be used. + pub exports: Vec, + + /// The submodules that this module contains. + /// + /// Part of them may be present in the exports list. + pub children: Vec, +} + +impl ModuleTree { + pub fn new(name: OsString) -> Self { + Self { + name, + exports: Vec::new(), + children: Vec::new(), + } + } + + pub fn insert(&mut self, path: &Path, exports: Vec) { + let mut current = self; + for component in path.iter() { + let found = current + .children + .iter() + .position(|module| module.name == component); + match found { + Some(index) => { + current = &mut current.children[index]; + } + None => { + let module = ModuleTree { + name: component.to_os_string(), + children: Vec::new(), + exports: Vec::new(), + }; + current.children.push(module); + current = current.children.last_mut().expect("module just added"); + } + } + } + current.exports.extend(exports); + } + + pub fn get(&self, part: &OsStr) -> Option<&ModuleTree> { + self.children.iter().find(|module| module.name == part) + } + + pub fn get_full(&self, path: &Path) -> Option<&ModuleTree> { + let mut current = self; + for component in path.iter() { + current = current + .children + .iter() + .find(|module| module.name == component)?; + } + Some(current) + } + + pub fn get_full_mut(&mut self, path: &Path) -> Option<&mut ModuleTree> { + let mut current = self; + for component in path.iter() { + current = current + .children + .iter_mut() + .find(|module| module.name == component)?; + } + Some(current) + } + + pub fn take_exports(&mut self, path: &Path) -> Vec { + let mut current = self; + for component in path.iter() { + if let Some(module) = current + .children + .iter_mut() + .find(|module| module.name == component) + { + current = module; + } else { + return Vec::new(); + } + } + std::mem::take(&mut current.exports) + } +} + +#[derive(Clone, Copy)] +pub(crate) struct ModuleView<'a> { + pub(crate) current: &'a ModuleTree, + pub(crate) foreign: &'a HashMap, +} + +impl<'a> ModuleView<'a> { + pub(crate) fn new(current: &'a ModuleTree, foreign: &'a HashMap) -> Self { + Self { current, foreign } + } + + pub(crate) fn get(&self, item: &InclusionPathItem) -> Option<&ModuleTree> { + match item { + InclusionPathItem::Symbol(ident) => self.foreign.get(OsStr::new(ident.value.as_str())), + InclusionPathItem::Reef(_) => Some(self.current), + } + } + + pub(crate) fn get_direct(&self, path: &[InclusionPathItem]) -> Option<&ModuleTree> { + let (first, rest) = path.split_first().expect("path should not be empty"); + let mut tree = self.get(first)?; + for item in rest { + tree = match item { + InclusionPathItem::Symbol(ident) => tree.get(OsStr::new(ident.value.as_str()))?, + InclusionPathItem::Reef(_) => return None, + }; + } + Some(tree) + } +} + +/// Access all related files starting from the entrypoint. +pub(super) fn import_multi( + reef: &mut Reef, + fs: &dyn Filesystem, + entrypoint: &str, +) -> Vec { + let mut imports = vec![Import { + path: PathBuf::from(entrypoint), + origin: None, + }]; + let mut errors = Vec::::new(); + let mut visited = HashSet::::new(); + while let Some(Import { mut path, origin }) = imports.pop() { + if !visited.insert(path.clone()) { + continue; + } + let source = match fs.read(path.as_path()) { + Ok(source) => source, + Err(error) => { + if error.kind() == io::ErrorKind::NotFound && path.pop() { + imports.push(Import { path, origin }); + continue; + } + errors.push(ModuleError::Import { + error, + cause: origin, + }); + continue; + } + }; + let report = parser::parse(&source); + let root = Root { + expressions: report.expr, + }; + errors.extend(report.errors.into_iter().map(|error| ModuleError::Parse { + path: path.clone(), + error, + })); + let mut exports = Vec::::new(); + for duplicated in hoist_exports(&root, &mut exports) { + errors.push(ModuleError::Duplicate { + name: duplicated.name, + path: path.clone(), + first: duplicated.first, + second: duplicated.second, + }); + } + list_imports(&root, &path, &mut imports); + reef.exports.insert(&path, exports); + reef.files.insert(path, root); + } + errors +} + +#[derive(Debug, PartialEq, Eq)] +struct Duplicated { + name: String, + first: Span, + second: Span, +} + +fn hoist_exports(root: &Root, exports: &mut Vec) -> Vec { + let mut duplicates = Vec::::new(); + for expr in &root.expressions { + if let Expr::FunctionDeclaration(FunctionDeclaration { name, segment, .. }) = expr { + if let Some(exported) = exports.iter().find(|export| export.name == name.value) { + duplicates.push(Duplicated { + name: name.to_string(), + first: exported.span.clone(), + second: segment.clone(), + }); + } else { + exports.push(Export { + name: name.to_string(), + span: segment.clone(), + registry: SymbolRegistry::Function, + ty: UNKNOWN_TYPE, + }); + } + } else if let Expr::VarDeclaration(VarDeclaration { var, segment, .. }) = expr { + if let Some(exported) = exports + .iter() + .find(|export| export.name == var.name.value.as_str()) + { + duplicates.push(Duplicated { + name: var.name.to_string(), + first: exported.span.clone(), + second: segment.clone(), + }); + } else { + exports.push(Export { + name: var.name.to_string(), + span: segment.clone(), + registry: SymbolRegistry::Variable, + ty: UNKNOWN_TYPE, + }); + } + } else if let Expr::StructDeclaration(decl) = expr { + if let Some(exported) = exports + .iter() + .find(|export| export.name == decl.name.value.as_str()) + { + duplicates.push(Duplicated { + name: decl.name.to_string(), + first: exported.span.clone(), + second: decl.name.segment().clone(), + }); + } else { + exports.push(Export { + name: decl.name.to_string(), + span: decl.name.segment().clone(), + registry: SymbolRegistry::Type, + ty: UNKNOWN_TYPE, + }); + } + } + } + duplicates +} + +fn list_imports(root: &Root, path: &Path, imports: &mut Vec) { + for expr in &root.expressions { + list_imports_expr(expr, path, imports); + } +} + +fn list_imports_expr(expr: &Expr, path: &Path, imports: &mut Vec) { + match expr { + Expr::Use(Use { import, segment }) => { + add_import(import, path, segment.clone(), imports); + } + Expr::VarDeclaration(VarDeclaration { + initializer: Some(initializer), + .. + }) => { + list_imports_expr(initializer, path, imports); + } + Expr::ProgrammaticCall(ProgrammaticCall { + path: include_path, + arguments, + .. + }) => { + if let [InclusionPathItem::Reef(_), include_path @ ..] = include_path.as_slice() { + let span = include_path + .last() + .expect("at least one item") + .segment() + .start + ..include_path + .last() + .expect("at least one item") + .segment() + .end; + add_import_tree(include_path, path, span, imports); + } + for arg in arguments { + list_imports_expr(arg, path, imports); + } + } + Expr::FunctionDeclaration(FunctionDeclaration { + body: Some(body), .. + }) => { + list_imports_expr(body, path, imports); + } + _ => {} + } +} + +fn add_import(import: &ImportExpr, origin: &Path, span: SourceSegment, imports: &mut Vec) { + match import { + ImportExpr::Symbol(ImportedSymbol { path, .. }) | ImportExpr::AllIn(path, _) => { + let [InclusionPathItem::Reef(_), rest @ ..] = path.as_slice() else { + return; + }; + add_import_tree(rest, origin, span, imports); + } + ImportExpr::Environment(_) => {} + ImportExpr::List(items) => { + for item in &items.imports { + add_import(item, origin, span.clone(), imports); + } + } + } +} + +fn add_import_tree( + items: &[InclusionPathItem], + origin: &Path, + span: SourceSegment, + imports: &mut Vec, +) { + let mut path = PathBuf::new(); + for item in items { + match item { + InclusionPathItem::Symbol(ident) => { + path.push(ident.value.as_str()); + } + InclusionPathItem::Reef(_) => { + return; // The path is not usable, the error will be reported later + } + } + } + imports.push(Import { + path, + origin: Some(SourceLocation { + path: origin.to_path_buf(), + span, + }), + }); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::MemoryFilesystem; + use std::str::FromStr; + + fn import_multi(sources: [(PathBuf, &str); N]) -> Vec { + let entrypoint = sources + .first() + .expect("at least one source") + .0 + .display() + .to_string(); + let mut reef = Reef::new(OsString::from("test")); + let fs = MemoryFilesystem::from_iter(sources); + super::import_multi(&mut reef, &fs, &entrypoint) + } + + #[test] + fn find_duplicate_functions() { + let root = Root::from_str("fun a() = {}\nfun a() = {}").unwrap(); + let duplicates = hoist_exports(&root, &mut Vec::::new()); + assert_eq!( + duplicates, + [Duplicated { + name: "a".to_owned(), + first: 0..12, + second: 13..25 + }] + ); + } + + #[test] + fn unknown_file_import() { + let errors = import_multi([(PathBuf::from("main"), "use reef::foo")]); + assert_eq!( + errors.as_slice(), + [ModuleError::Import { + error: io::Error::new(io::ErrorKind::NotFound, "file not found"), + cause: Some(SourceLocation { + path: PathBuf::from("main"), + span: 0..13 + }) + }] + ); + } + + #[test] + fn valid_file_import() { + let errors = import_multi([ + (PathBuf::from("main"), "use test"), + (PathBuf::from("test"), ""), + ]); + assert_eq!(errors, []); + } + + #[test] + fn valid_symbol_import_import() { + let errors = import_multi([ + (PathBuf::from("main"), "use test::bar"), + (PathBuf::from("test"), ""), + ]); + assert_eq!(errors, []); + } +} diff --git a/analyzer/src/name.rs b/analyzer/src/name.rs deleted file mode 100644 index 78d709bb..00000000 --- a/analyzer/src/name.rs +++ /dev/null @@ -1,130 +0,0 @@ -use std::fmt::{Display, Formatter}; - -///The name of a symbol, a module or a context. -#[derive(Debug, PartialOrd, Ord, Clone, PartialEq, Eq, Hash)] -pub struct Name { - parts: Vec, -} - -impl Name { - ///Parses a new name from the given string. - pub fn new(name: &str) -> Self { - let parts: Vec = name.split("::").map(|s| s.to_string()).collect(); - - Self { parts } - } - - /// Tests if this name contains a path and a simple name. - pub fn is_qualified(&self) -> bool { - self.parts.len() != 1 - } - - /// Creates a new Name from a path and a simple name. - pub fn qualified(mut path: Vec, name: String) -> Self { - path.push(name); - Self::from(path) - } - - ///Creates a new Name with the simple name changed with given input - pub fn with_name(mut self, simple_name: &str) -> Self { - let last_idx = self.parts.len() - 1; - self.parts[last_idx] = simple_name.to_string(); - self - } - - ///The parts of this Name - pub fn parts(&self) -> &[String] { - &self.parts - } - - ///Convert this Name in a Vec - pub fn into_vec(self) -> Vec { - self.parts - } - - ///Creates a new name relative to given input. - pub fn relative_to(&self, other: &Name) -> Option { - let common_parts_len = other - .parts - .clone() - .into_iter() - .zip(&self.parts) - .take_while(|(a, b)| a == *b) - .count(); - - if common_parts_len == self.parts.len() { - return None; - } - let parts: Vec<_> = self - .parts - .clone() - .into_iter() - .skip(common_parts_len) - .collect(); - Some(Name::from(parts)) - } - - ///returns an iterator over the prefixed path of the name - pub fn path(&self) -> &[String] { - self.parts.split_last().unwrap().1 //Names cannot be empty - } - - ///returns the name's root (its very first part) - pub fn root(&self) -> &str { - self.parts.first().unwrap() //Names cannot be empty - } - - ///returns the simple name of name - pub fn simple_name(&self) -> &str { - self.parts.last().unwrap() //Names cannot be empty - } - - ///Creates a new name with this name as a prefixed path - pub fn child(&self, name: &str) -> Self { - let mut parts = self.parts.clone(); - parts.push(name.to_string()); - Self { parts } - } - - ///Returns the tail of the name (the name without it's root part, or None if this name have only one part) - pub fn tail(&self) -> Option { - if self.path().is_empty() { - return None; - } - self.parts - .split_last() - .map(|(_, tail)| Name::from(tail.to_vec())) - } - - ///Returns a name with given name merged - pub fn appended(&self, mut name: Self) -> Self { - let mut parts = self.parts.clone(); - parts.append(&mut name.parts); - Self { parts } - } -} - -impl From> for Name { - fn from(value: Vec) -> Self { - assert!(!value.is_empty(), "cannot create a name from an empty vec"); - Self { parts: value } - } -} - -impl From<&[String]> for Name { - fn from(value: &[String]) -> Self { - value.to_vec().into() - } -} - -impl Display for Name { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - if let Some((name, tail)) = self.parts.split_last() { - for module in tail { - write!(f, "{module}::")?; - } - write!(f, "{name}")?; - } - Ok(()) - } -} diff --git a/analyzer/src/reef.rs b/analyzer/src/reef.rs deleted file mode 100644 index 7af6d9de..00000000 --- a/analyzer/src/reef.rs +++ /dev/null @@ -1,99 +0,0 @@ -use crate::Analyzer; -use std::collections::HashMap; - -use crate::engine::Engine; -use crate::relations::{ObjectId, Relations}; -use crate::types::builtin::lang_reef; -use crate::types::ctx::TypeContext; -use crate::types::engine::TypedEngine; -use crate::types::Typing; - -#[derive(Debug)] -pub struct Reef<'e> { - pub name: String, - - pub engine: Engine<'e>, - pub relations: Relations, - - pub typed_engine: TypedEngine, - pub typing: Typing, - pub type_context: TypeContext, -} - -impl<'e> Reef<'e> { - pub fn new(name: String, analyzer: Analyzer<'e>) -> Self { - Self { - name, - engine: analyzer.resolution.engine, - relations: analyzer.resolution.relations, - typed_engine: analyzer.engine, - typing: analyzer.typing, - type_context: analyzer.type_context, - } - } - - pub fn new_partial(name: String, engine: Engine<'e>, relations: Relations) -> Self { - Self { - name, - engine, - relations, - typed_engine: TypedEngine::default(), - typing: Typing::default(), - type_context: TypeContext::default(), - } - } -} - -#[derive(Clone, Copy, Hash, Eq, PartialEq, Debug)] -pub struct ReefId(pub ObjectId); - -#[derive(Debug)] -pub struct Externals<'a> { - pub current: ReefId, - names: HashMap, - reefs: Vec>, -} - -pub const LANG_REEF: ReefId = ReefId(0); - -impl Default for Externals<'_> { - /// Creates a Reefs set with the required `lang` reef with id 0 - fn default() -> Self { - Self { - current: ReefId(1), - names: HashMap::from([("lang".to_string(), LANG_REEF)]), - reefs: vec![lang_reef()], - } - } -} - -impl<'e> Externals<'e> { - /// Return the lang's reef - pub fn lang(&self) -> &Reef<'e> { - &self.reefs[LANG_REEF.0] - } - - pub fn get_reef(&self, id: ReefId) -> Option<&Reef<'e>> { - self.reefs.get(id.0) - } - - pub fn get_reef_by_name(&self, name: &str) -> Option<(&Reef<'e>, ReefId)> { - self.names - .get(name) - .and_then(|id| self.get_reef(*id).map(|reef| (reef, *id))) - } - - fn get_reef_mut(&mut self, id: ReefId) -> Option<&mut Reef<'e>> { - self.reefs.get_mut(id.0) - } - - pub fn register(&mut self, reef: Reef<'e>) -> ReefId { - let id = ReefId(self.reefs.len()); - if self.names.insert(reef.name.clone(), id).is_some() { - panic!("Reef with name {} already registered", reef.name); - } - self.reefs.push(reef); - self.current.0 += 1; - id - } -} diff --git a/analyzer/src/relations.rs b/analyzer/src/relations.rs deleted file mode 100644 index 91c13c28..00000000 --- a/analyzer/src/relations.rs +++ /dev/null @@ -1,243 +0,0 @@ -use std::fmt::Debug; -use std::ops::{Index, IndexMut}; - -use context::source::SourceSegment; - -use crate::dependency::Dependencies; -use crate::engine::Engine; -use crate::environment::symbols::SymbolRegistry; -use crate::reef::{ReefId, LANG_REEF}; - -/// The object identifier base. -/// -/// An object is anything that can be referenced by its [`ObjectId`], -/// here's an exhaustive list of the main objects and structures in the analyzer. -/// -/// - [`RelationId`] points to a relation in [`Relation`]. -/// - [`SourceId`] points to a source in [`Engine`], sources are a root AST expression bound to an [`Environment`]. -/// - [`LocalId`] points to a local object in an environment's [`crate::environment::symbols::Symbols`]. -/// - [`NativeId`] refers to an intrinsic function or method. -/// - [`crate::types::TypeId`] points to a type registered in [`Typing`] -/// -/// Some main structures are based on object identifiers -/// - [`ResolvedSymbol`] contains a [`SourceId`] and a [`LocalId`] which globally targets a symbol inside a given source environment, -/// this structure is emitted by the resolution phase. -/// - [`SymbolRef`] refers to a symbol, which is either local (to an unbound environment) or external, where the relation is held by the [`Relations`] -pub type ObjectId = usize; - -/// A relation identifier, that points to a specific relation in the [`Relations`]. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub struct RelationId(pub ObjectId); - -/// A source identifier, that can be the target of a global resolution. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub struct SourceId(pub ObjectId); - -/// An identifier for a local variable stored in an environment -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub struct LocalId(pub ObjectId); - -/// An indication where an object is located. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub enum SymbolRef { - /// A local object, referenced by its index in the [`Environment`] it is defined in. - Local(LocalId), - - /// An external symbol, where the relation is contained in the [`Resolver`]. - External(RelationId), -} - -impl From for SymbolRef { - fn from(id: RelationId) -> Self { - SymbolRef::External(id) - } -} - -impl From for SymbolRef { - fn from(id: LocalId) -> Self { - SymbolRef::Local(id) - } -} - -/// The resolved information about a symbol. -#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] -pub struct ResolvedSymbol { - /// The symbol's reef - pub reef: ReefId, - /// The module where the symbol is defined. - /// - /// This is used to route the symbol to the correct environment. - pub source: SourceId, - - /// The object identifier of the symbol, local to the given environment. - pub object_id: LocalId, -} - -impl ResolvedSymbol { - pub const fn new(reef: ReefId, source: SourceId, object_id: LocalId) -> Self { - Self { - reef, - source, - object_id, - } - } - - pub const fn lang_symbol(object_id: LocalId) -> Self { - Self { - reef: LANG_REEF, - source: SourceId(0), - object_id, - } - } -} - -/// The state of a relation -/// -/// The [SymbolResolver] only attempts to resolve relations marked as [RelationState::Unresolved] -/// If the resolution fails, for any reason, the object is marked as dead ([RelationState::Dead]) -/// which in most case implies a diagnostic. -/// The dead state prevents the resolver to attempt to resolve again unresolvable symbols on next cycles. -/// If the relation was successfully resolved, the state is then [RelationState::Resolved], containing the -/// resolved symbol and targeted environment. -#[derive(Debug, Clone, Copy, Hash, PartialEq)] -pub enum RelationState { - Resolved(ResolvedSymbol), - Unresolved, - Dead, -} - -impl RelationState { - pub fn expect_resolved(self, msg: &str) -> ResolvedSymbol { - match self { - RelationState::Resolved(resolved) => resolved, - _ => panic!("{}", msg), - } - } -} - -#[derive(Debug, Clone, Hash, PartialEq)] -pub struct Relation { - /// The environment's id that requested this object resolution. - pub origin: SourceId, - - /// This relation's state. - /// See [RelationState] for more details - pub state: RelationState, - - /// The targeted registry of the symbol - pub registry: SymbolRegistry, -} - -impl Relation { - pub fn unresolved(origin: SourceId, registry: SymbolRegistry) -> Self { - Self { - origin, - state: RelationState::Unresolved, - registry, - } - } - - pub fn resolved(origin: SourceId, resolved: ResolvedSymbol, registry: SymbolRegistry) -> Self { - Self { - origin, - state: RelationState::Resolved(resolved), - registry, - } - } -} - -/// A collection of objects that are tracked globally and may link to each other. -#[derive(Debug, Default)] -pub struct Relations { - /// The tracked relations between environments. - /// - /// The actual [`String`] -> [`ObjectId`] mapping is left to the [`crate::environment::Environment`]. - /// The reason that the resolution information is lifted out of the environment is that identifiers - /// binding happens across modules, and an environment cannot guarantee that it will be able to generate - /// unique identifiers for all the symbols that do not conflicts with the ones from other modules. - relations: Vec, -} - -impl Relations { - /// Tracks a new object and returns its identifier. - pub fn track_new_object(&mut self, origin: SourceId, registry: SymbolRegistry) -> RelationId { - let id = self.relations.len(); - self.relations.push(Relation::unresolved(origin, registry)); - RelationId(id) - } - - /// Finds segments that reference the given object. - /// - /// Returns [`None`] if the object is neither found nor tracked. - pub fn find_references( - &self, - engine: &Engine, - tracked_object: RelationId, - ) -> Option> { - let object = self.relations.get(tracked_object.0)?; - let environment = engine - .get_environment(object.origin) - .expect("object relation targets to an unknown environment"); - Some(environment.find_references(SymbolRef::External(tracked_object))) - } - - /// Returns a mutable iterator over all the objects. - pub fn iter_mut(&mut self) -> impl Iterator { - self.relations - .iter_mut() - .enumerate() - .map(|(id, relation)| (RelationId(id), relation)) - } - - /// Returns an immutable iterator over all the objects. - pub fn iter(&self) -> impl Iterator { - self.relations - .iter() - .enumerate() - .map(|(id, relation)| (RelationId(id), relation)) - } - - /// Returns the state of the given object. - /// - /// If the relation is not referenced, returns [`None`]. - pub fn get_state(&self, id: RelationId) -> Option { - Some(self.relations.get(id.0)?.state) - } - - /// Removes all the objects that have been created at or after the given id. - pub fn retain_before(&mut self, id: SourceId) { - self.relations.retain(|relation| relation.origin.0 < id.0); - } - - /// Creates a dependency graph for the given engine. - /// only symbols that are inside of the engine's reef are placed in the graph. - pub fn as_dependencies(&self, engine_reef: ReefId, engine: &Engine) -> Dependencies { - let mut dependencies = Dependencies::default(); - for (id, _) in engine.environments() { - dependencies.add_node(id); - } - - for object in self.relations.iter() { - if let RelationState::Resolved(resolved) = object.state { - if resolved.reef == engine_reef { - dependencies.add_dependency(object.origin, resolved.source); - } - } - } - dependencies - } -} - -impl IndexMut for Relations { - fn index_mut(&mut self, index: RelationId) -> &mut Self::Output { - &mut self.relations[index.0] - } -} - -impl Index for Relations { - type Output = Relation; - - fn index(&self, index: RelationId) -> &Self::Output { - &self.relations[index.0] - } -} diff --git a/analyzer/src/steps.rs b/analyzer/src/steps.rs deleted file mode 100644 index f08ea86b..00000000 --- a/analyzer/src/steps.rs +++ /dev/null @@ -1,42 +0,0 @@ -use crate::diagnostic::Diagnostic; -use crate::importer::ASTImporter; -use crate::name::Name; -use crate::reef::Externals; -use crate::steps::collect::SymbolCollector; -use crate::steps::resolve::SymbolResolver; -use crate::ResolutionResult; - -pub mod collect; -pub mod resolve; -mod shared_diagnostics; -pub mod typing; - -pub(super) fn resolve_sources( - mut to_visit: Vec, - result: &mut ResolutionResult, - importer: &mut impl ASTImporter, - externals: &Externals, - diagnostics: &mut Vec, -) { - while !to_visit.is_empty() { - diagnostics.extend(SymbolCollector::collect_symbols( - &mut result.engine, - &mut result.relations, - &mut result.imports, - externals, - &mut to_visit, - &mut result.visited, - importer, - )); - diagnostics.extend(SymbolResolver::resolve_symbols( - &result.engine, - &mut result.relations, - &mut result.imports, - externals, - &mut to_visit, - &result.visited, - )); - // The cycle ended, if `to_visit` is still non empty, a new cycle will be started - // to resolve the modules to visit and so on - } -} diff --git a/analyzer/src/steps/collect.rs b/analyzer/src/steps/collect.rs deleted file mode 100644 index cbd8b97a..00000000 --- a/analyzer/src/steps/collect.rs +++ /dev/null @@ -1,1258 +0,0 @@ -use std::collections::HashSet; - -use ast::call::Call; -use ast::control_flow::ForKind; -use ast::function::FunctionParameter; -use ast::r#match::MatchPattern; -use ast::r#type::Type; -use ast::r#use::{Import as ImportExpr, InclusionPathItem}; -use ast::range; -use ast::value::LiteralValue; -use ast::variable::{Tilde, VarName}; -use ast::Expr; -use context::source::{ContentId, SourceSegment, SourceSegmentHolder}; -use range::Iterable; - -use crate::diagnostic::{Diagnostic, DiagnosticID, Observation}; -use crate::engine::Engine; -use crate::environment::symbols::{MagicSymbolKind, SymbolInfo, SymbolLocation, SymbolRegistry}; -use crate::environment::Environment; -use crate::importer::{ASTImporter, ImportResult, Imported}; -use crate::imports::{Imports, UnresolvedImport}; -use crate::name::Name; -use crate::reef::{Externals, ReefId}; -use crate::relations::{RelationState, Relations, SourceId, SymbolRef}; -use crate::steps::resolve::SymbolResolver; -use crate::steps::shared_diagnostics::diagnose_invalid_symbol; -use crate::steps::typing::magic::is_magic_variable_name; -use crate::Inject; - -/// Defines the current state of the tree exploration. -#[derive(Debug, Clone, Copy)] -struct ResolutionState { - /// The id of the AST tree that is currently being explored. - content: ContentId, - - /// The module id that is currently being explored. - module: SourceId, - - /// Whether the current module accepts imports. - accept_imports: bool, -} - -impl ResolutionState { - fn new(content: ContentId, module: SourceId) -> Self { - Self { - content, - module, - accept_imports: true, - } - } - - fn fork(self, module: SourceId) -> Self { - Self { - content: self.content, - module, - accept_imports: false, - } - } -} - -pub struct SymbolCollector<'a, 'b, 'e> { - engine: &'a mut Engine<'e>, - relations: &'a mut Relations, - imports: &'a mut Imports, - externals: &'b Externals<'b>, - diagnostics: Vec, - - /// The stack of environments currently being collected. - stack: Vec, -} - -impl<'a, 'b, 'e> SymbolCollector<'a, 'b, 'e> { - /// Explores the entry point and all its recursive dependencies. - /// - /// This collects all the symbols that are used, locally or not yet resolved if they are global. - /// Returns a vector of diagnostics raised by the collection process. - pub fn collect_symbols( - engine: &'a mut Engine<'e>, - relations: &'a mut Relations, - imports: &'a mut Imports, - externals: &'b Externals<'b>, - to_visit: &mut Vec, - visited: &mut HashSet, - importer: &mut impl ASTImporter, - ) -> Vec { - let mut collector = Self::new(engine, relations, imports, externals); - collector.collect(importer, to_visit, visited); - collector.check_symbols_identity(); - collector.diagnostics - } - - pub fn inject( - inject: Inject, - engine: &'a mut Engine<'e>, - relations: &'a mut Relations, - imports: &'a mut Imports, - externals: &'b Externals<'b>, - to_visit: &mut Vec, - ) -> Vec { - assert_ne!( - inject.attached, - Some(SourceId(engine.len())), - "Cannot inject a module to itself" - ); - let mut collector = Self::new(engine, relations, imports, externals); - let root_block = collector.engine.take(inject.imported.expr); - - let mut env = Environment::script(inject.name); - env.parent = inject.attached; - let mut state = ResolutionState::new( - inject.imported.content, - collector.engine.track(inject.imported.content, root_block), - ); - collector.engine.attach(state.module, env); - collector.stack.push(state.module); - - collector.tree_walk(&mut state, root_block, to_visit); - collector.stack.pop(); - collector.check_symbols_identity(); - collector.diagnostics - } - - fn new( - engine: &'a mut Engine<'e>, - relations: &'a mut Relations, - imports: &'a mut Imports, - externals: &'b Externals<'b>, - ) -> Self { - Self { - engine, - relations, - imports, - externals, - diagnostics: Vec::new(), - stack: Vec::new(), - } - } - - fn current_env(&mut self) -> &mut Environment { - self.engine - .get_environment_mut(*self.stack.last().unwrap()) - .unwrap() - } - - fn engine(&mut self) -> &mut Engine<'e> { - self.engine - } - - /// Performs a check over the collected symbols of root environments - /// to ensure that the environment does not declares a symbols with the same name of - /// another module. - /// - /// For example, if the module `a` defines a symbol `b`, and the module `a::b` also exists - /// there is no way to identify if either `a::b` is the symbol, or `a::b` is the module. - fn check_symbols_identity(&mut self) { - let roots = self - .engine - .environments() - .filter(|(_, e)| e.parent.is_none()); //keep root environments - for (env_id, env) in roots { - let env_name = &env.fqn; - let mut reported = HashSet::new(); - for (declaration_segment, symbol) in &env.definitions { - let id = match symbol { - SymbolRef::Local(id) => id, - SymbolRef::External(_) => continue, //we check declarations only, thus external symbols are ignored - }; - if !reported.insert(id) { - continue; - } - let symbol = env - .symbols - .get(*id) - .expect("local symbol references an unknown variable"); - let var_fqn = env_name.appended(Name::new(&symbol.name)); - - let clashed_module = self - .engine - .environments() - .find(|(_, e)| e.parent.is_none() && e.fqn == var_fqn) - .map(|(_, e)| e); - - if let Some(clashed_module) = clashed_module { - let inner_modules = { - //we know that the inner envs contains at least one environment (the env being clashed with) - let list = list_inner_modules(self.engine, &env.fqn) - .map(|e| e.fqn.simple_name()) - .collect::>(); - - let (head, tail) = list.split_first().unwrap(); - let str = tail - .iter() - .fold(format!("{env_name}::{{{head}"), |acc, it| { - format!("{acc}, {it}") - }); - format!("{str}}}") - }; - - let msg = format!( - "Declared symbol '{}' in module {env_name} clashes with module {}", - symbol.name, &clashed_module.fqn - ); - let diagnostic = { - Diagnostic::new(DiagnosticID::SymbolConflictsWithModule, msg) - .with_observation( - Observation::here( - env_id, - self.externals.current, - declaration_segment.clone(), - format!("This symbol has the same fully-qualified name as module {}", clashed_module.fqn) - ) - ) - .with_help(format!("You should refactor this symbol with a name that does not conflicts with following modules: {inner_modules}")) - }; - self.diagnostics.push(diagnostic) - } - } - } - } - - fn collect( - &mut self, - importer: &mut impl ASTImporter, - to_visit: &mut Vec, - visited: &mut HashSet, - ) { - while let Some(name) = to_visit.pop() { - //try to import the ast, if the importer isn't able to achieve this and returns None, - //Ignore this ast analysis. It'll be up to the given importer implementation to handle the - //errors caused by this import request failure - if let Some((imported, name)) = import_ast(name, importer, visited) { - self.collect_ast_symbols(imported, name, to_visit) - } - } - } - - fn collect_ast_symbols( - &mut self, - imported: Imported, - module_name: Name, - to_visit: &mut Vec, - ) { - // Immediately transfer the ownership of the AST to the engine. - let root_block = self.engine.take(imported.expr); - - let env = Environment::script(module_name); - let mut state = ResolutionState::new( - imported.content, - self.engine.track(imported.content, root_block), - ); - self.engine.attach(state.module, env); - self.stack.push(state.module); - - self.tree_walk(&mut state, root_block, to_visit); - self.stack.pop(); - } - - fn add_checked_import( - &mut self, - mod_id: SourceId, - import: UnresolvedImport, - import_expr: &'e ImportExpr, - import_fqn: Name, - ) { - if let Some(shadowed) = - self.imports - .add_unresolved_import(mod_id, import, import_expr.segment()) - { - let reef = self.externals.current; - let diagnostic = Diagnostic::new( - DiagnosticID::ShadowedImport, - format!("{import_fqn} is imported twice."), - ) - .with_observation(Observation::here( - mod_id, - reef, - shadowed, - "useless import here", - )) - .with_observation(Observation::context( - mod_id, - reef, - import_expr.segment(), - "This statement shadows previous import", - )); - self.diagnostics.push(diagnostic) - } - } - - /// Collects the symbol import and place it as an [UnresolvedImport] in the relations. - fn collect_symbol_import( - &mut self, - import: &'e ImportExpr, - mut relative_path: Vec, - mod_id: SourceId, - to_visit: &mut Vec, - ) { - let reef = self.externals.current; - match import { - ImportExpr::Symbol(s) => { - relative_path.extend(s.path.iter().cloned()); - match SymbolLocation::compute(&relative_path) { - Ok(loc) => { - let alias = s.alias.as_ref().map(|s| s.to_string()); - - let name = loc.name.clone(); - to_visit.push(name.clone()); - - let unresolved = UnresolvedImport::Symbol { alias, loc }; - self.add_checked_import(mod_id, unresolved, import, name) - } - Err(segments) => self - .diagnostics - .push(make_invalid_path_diagnostic(mod_id, reef, segments)), - } - } - ImportExpr::AllIn(items, _) => { - relative_path.extend(items.iter().cloned()); - match SymbolLocation::compute(&relative_path) { - Ok(loc) => { - let name = loc.name.clone(); - to_visit.push(name.clone()); - let unresolved = UnresolvedImport::AllIn(loc); - self.add_checked_import(mod_id, unresolved, import, name) - } - Err(segments) => self - .diagnostics - .push(make_invalid_path_diagnostic(mod_id, reef, segments)), - } - } - - ImportExpr::Environment(_) => { - let diagnostic = Diagnostic::new( - DiagnosticID::UnsupportedFeature, - "import of environment variables and commands are not yet supported.", - ) - .with_observation((mod_id, reef, import.segment()).into()); - - self.diagnostics.push(diagnostic); - } - ImportExpr::List(list) => { - relative_path.extend(list.root.iter().cloned()); - - match SymbolLocation::compute(&list.root) { - Ok(_) => { - for list_import in &list.imports { - self.collect_symbol_import( - list_import, - relative_path.clone(), - mod_id, - to_visit, - ) - } - } - Err(segments) => self - .diagnostics - .push(make_invalid_path_diagnostic(mod_id, reef, segments)), - } - } - } - } - - fn tree_walk(&mut self, state: &mut ResolutionState, expr: &'e Expr, to_visit: &mut Vec) { - match expr { - Expr::Use(import) => { - if !state.accept_imports { - let diagnostic = Diagnostic::new( - DiagnosticID::UseBetweenExprs, - "Unexpected use statement between expressions. Use statements must be at the top of the environment.", - ).with_observation((state.module, self.externals.current, import.segment()).into()); - self.diagnostics.push(diagnostic); - return; - } - self.collect_symbol_import(&import.import, Vec::new(), state.module, to_visit); - return; - } - Expr::Assign(assign) => { - self.tree_walk(state, &assign.left, to_visit); - self.tree_walk(state, &assign.value, to_visit); - } - Expr::Binary(binary) => { - self.tree_walk(state, &binary.left, to_visit); - self.tree_walk(state, &binary.right, to_visit); - } - Expr::Match(match_expr) => { - self.tree_walk(state, &match_expr.operand, to_visit); - for arm in &match_expr.arms { - for pattern in &arm.patterns { - match pattern { - MatchPattern::VarRef(reference) => { - if let VarName::User(name) = &reference.name { - let symbol = self.identify_symbol( - *self.stack.last().unwrap(), - state.module, - SymbolLocation::unspecified(Name::new(name)), - reference.segment(), - SymbolRegistry::Objects, - ); - self.current_env().annotate(reference, symbol); - } - } - MatchPattern::Template(template) => { - for part in &template.parts { - self.tree_walk(state, part, to_visit); - } - } - MatchPattern::Literal(_) | MatchPattern::Wildcard(_) => {} - } - } - if let Some(guard) = &arm.guard { - self.current_env().begin_scope(); - self.tree_walk(state, guard, to_visit); - self.current_env().end_scope(); - } - self.current_env().begin_scope(); - if let Some(name) = &arm.val_name { - self.current_env() - .symbols - .declare_local(name.to_string(), SymbolInfo::Variable); - } - self.tree_walk(state, &arm.body, to_visit); - self.current_env().end_scope(); - } - } - Expr::Call(call) => { - self.resolve_special_call(*self.stack.last().unwrap(), call); - for arg in &call.arguments { - self.tree_walk(state, arg, to_visit); - } - } - Expr::ProgrammaticCall(call) => { - match SymbolLocation::compute(&call.path) { - Ok(loc) => { - let symbol = self.identify_symbol( - *self.stack.last().unwrap(), - state.module, - loc, - call.segment(), - SymbolRegistry::Objects, - ); - - self.current_env().annotate(call, symbol); - } - Err(segments) => self.diagnostics.push(make_invalid_path_diagnostic( - state.module, - self.externals.current, - segments, - )), - } - - for ty in &call.type_parameters { - self.collect_type(state.module, ty); - } - - for arg in &call.arguments { - self.tree_walk(state, arg, to_visit); - } - } - Expr::MethodCall(call) => { - self.tree_walk(state, &call.source, to_visit); - for targ in &call.type_parameters { - self.collect_type(state.module, targ) - } - for arg in &call.arguments { - self.tree_walk(state, arg, to_visit); - } - } - Expr::Pipeline(pipeline) => { - for expr in &pipeline.commands { - self.tree_walk(state, expr, to_visit); - } - } - Expr::Redirected(redirected) => { - self.tree_walk(state, &redirected.expr, to_visit); - for redir in &redirected.redirections { - self.tree_walk(state, &redir.operand, to_visit); - } - } - Expr::Detached(detached) => { - self.tree_walk(state, &detached.underlying, to_visit); - } - Expr::Path(ident) => match SymbolLocation::compute(&ident.path) { - Ok(loc) => { - let symbol = self.identify_symbol( - *self.stack.last().unwrap(), - state.module, - loc, - ident.segment(), - SymbolRegistry::Objects, - ); - self.current_env().annotate(ident, symbol); - } - Err(segments) => self.diagnostics.push(make_invalid_path_diagnostic( - state.module, - self.externals.current, - segments, - )), - }, - Expr::VarDeclaration(var) => { - if let Some(initializer) = &var.initializer { - self.tree_walk(state, initializer, to_visit); - } - if let Some(ty) = &var.var.ty { - self.collect_type(*self.stack.last().unwrap(), ty) - } - let env = self.current_env(); - let symbol = env - .symbols - .declare_local(var.var.name.to_string(), SymbolInfo::Variable); - env.annotate(var, SymbolRef::Local(symbol)); - } - Expr::VarReference(var) => { - if let VarName::User(name) = &var.name { - if is_magic_variable_name(name) { - let script_env = self - .engine - .get_environment_mut(*self.stack.first().unwrap()) - .unwrap(); - if script_env - .symbols - .find_reachable( - "", - SymbolRegistry::Magic(MagicSymbolKind::ProgramArguments), - ) - .is_none() - { - script_env - .symbols - .declare_magic(MagicSymbolKind::ProgramArguments); - } - } - - let symbol = self.identify_symbol( - *self.stack.last().unwrap(), - state.module, - SymbolLocation::unspecified(Name::new(name)), - var.segment(), - SymbolRegistry::Objects, - ); - self.current_env().annotate(var, symbol); - } - } - Expr::FieldAccess(access) => { - self.tree_walk(state, &access.expr, to_visit); - } - Expr::Range(range) => match range { - Iterable::Range(range) => { - self.tree_walk(state, &range.start, to_visit); - self.tree_walk(state, &range.end, to_visit); - if let Some(step) = &range.step { - self.tree_walk(state, step, to_visit); - } - let struct_name = if range.upper_inclusive { - "InclusiveRange" - } else { - "Range" - }; - let name = Name::from(vec!["std".to_owned(), struct_name.to_owned()]); - let symbol = self.identify_symbol( - *self.stack.last().unwrap(), - state.module, - SymbolLocation { - name, - is_current_reef_explicit: false, - }, - range.segment(), - SymbolRegistry::Objects, - ); - self.current_env().annotate(range, symbol); - } - Iterable::Files(pattern) => { - self.tree_walk(state, &pattern.pattern, to_visit); - } - }, - Expr::Subscript(sub) => { - self.tree_walk(state, &sub.target, to_visit); - self.tree_walk(state, &sub.index, to_visit); - } - Expr::Tilde(tilde) => { - let function_name = match &tilde.structure { - Tilde::HomeDir(Some(expr)) => { - self.tree_walk(state, expr, to_visit); - "home_dir" - } - Tilde::HomeDir(None) => "current_home_dir", - Tilde::WorkingDir => "working_dir", - }; - let name = Name::from(vec!["std".to_owned(), function_name.to_owned()]); - let symbol = self.identify_symbol( - *self.stack.last().unwrap(), - state.module, - SymbolLocation { - name, - is_current_reef_explicit: false, - }, - tilde.segment(), - SymbolRegistry::Objects, - ); - self.current_env().annotate(tilde, symbol); - } - Expr::Substitution(sub) => { - self.current_env().begin_scope(); - for expr in &sub.underlying.expressions { - self.tree_walk(state, expr, to_visit); - } - self.current_env().end_scope(); - } - Expr::TemplateString(template) => { - for expr in &template.parts { - self.tree_walk(state, expr, to_visit); - } - } - Expr::Casted(casted) => { - self.collect_type(*self.stack.last().unwrap(), &casted.casted_type); - self.tree_walk(state, &casted.expr, to_visit); - } - Expr::Test(test) => { - self.tree_walk(state, &test.expression, to_visit); - } - Expr::Unary(unary) => { - self.tree_walk(state, &unary.expr, to_visit); - } - Expr::Parenthesis(paren) => { - self.tree_walk(state, &paren.expression, to_visit); - } - Expr::Subshell(subshell) => { - self.current_env().begin_scope(); - for expr in &subshell.expressions { - self.tree_walk(state, expr, to_visit); - } - self.current_env().end_scope(); - } - Expr::Block(block) => { - self.current_env().begin_scope(); - for expr in &block.expressions { - self.tree_walk(state, expr, to_visit); - } - self.current_env().end_scope(); - } - Expr::If(if_expr) => { - self.current_env().begin_scope(); - self.tree_walk(state, &if_expr.condition, to_visit); - self.current_env().end_scope(); - self.current_env().begin_scope(); - self.tree_walk(state, &if_expr.success_branch, to_visit); - self.current_env().end_scope(); - if let Some(else_branch) = &if_expr.fail_branch { - self.current_env().begin_scope(); - self.tree_walk(state, else_branch, to_visit); - self.current_env().end_scope(); - } - } - Expr::While(wh) => { - self.current_env().begin_scope(); - self.tree_walk(state, &wh.condition, to_visit); - self.current_env().end_scope(); - self.current_env().begin_scope(); - self.tree_walk(state, &wh.body, to_visit); - self.current_env().end_scope(); - } - Expr::Loop(lp) => { - self.current_env().begin_scope(); - self.tree_walk(state, &lp.body, to_visit); - self.current_env().end_scope(); - } - Expr::For(fr) => { - self.current_env().begin_scope(); - match fr.kind.as_ref() { - ForKind::Range(range) => { - let env = self.current_env(); - let symbol = env - .symbols - .declare_local(range.receiver.to_string(), SymbolInfo::Variable); - env.annotate(range, SymbolRef::Local(symbol)); - self.tree_walk(state, &range.iterable, to_visit); - } - ForKind::Conditional(cond) => { - self.tree_walk(state, &cond.initializer, to_visit); - self.tree_walk(state, &cond.condition, to_visit); - self.tree_walk(state, &cond.increment, to_visit); - } - } - self.tree_walk(state, &fr.body, to_visit); - self.current_env().end_scope(); - } - Expr::Return(ret) => { - if let Some(expr) = &ret.expr { - self.tree_walk(state, expr, to_visit); - } - } - Expr::FunctionDeclaration(func) => { - let symbol = self - .current_env() - .symbols - .declare_local(func.name.value.to_string(), SymbolInfo::Function); - self.current_env().annotate(func, SymbolRef::Local(symbol)); - - let func_id = self.engine().track(state.content, expr); - self.current_env().bind_source(func, func_id); - let func_env = self - .current_env() - .fork(state.module, func.name.value.as_ref()); - - self.stack.push(func_id); - - let func_env = self.engine().attach(func_id, func_env); - - for type_param in &func.type_parameters { - func_env - .symbols - .declare_local(type_param.name.to_string(), SymbolInfo::Type); - if !type_param.params.is_empty() { - unimplemented!("Parametrized type parameters are not yet supported"); - } - } - - for param in &func.parameters { - let param_name = match param { - FunctionParameter::Named(named) => { - if let Some(ty) = &named.ty { - self.collect_type(func_id, ty); - } - named.name.to_string() - } - FunctionParameter::Variadic(_, _) => "@".to_owned(), - FunctionParameter::Slf(_) => continue, - }; - let func_env = self.engine().get_environment_mut(func_id).unwrap(); - - let symbol = func_env - .symbols - .declare_local(param_name, SymbolInfo::Variable); - - // Only named parameters can be annotated for now - if let FunctionParameter::Named(named) = param { - func_env.annotate(named, SymbolRef::Local(symbol)); - } - } - if let Some(ty) = &func.return_type { - self.collect_type(func_id, ty) - } - - if let Some(body) = &func.body { - self.tree_walk(&mut state.fork(func_id), body, to_visit); - } - - Self::resolve_captures( - &self.stack, - self.engine, - self.relations, - self.externals.current, - &mut self.diagnostics, - ); - self.stack.pop(); - } - Expr::LambdaDef(lambda) => { - let func_id = self.engine().track(state.content, expr); - - let func_env = self - .current_env() - .fork(state.module, &format!("lambda@{}", func_id.0)); - - self.stack.push(func_id); - self.engine().attach(func_id, func_env); - - for param in &lambda.args { - let func_env = self.engine().get_environment_mut(func_id).unwrap(); - let symbol = func_env - .symbols - .declare_local(param.name.value.as_str().to_owned(), SymbolInfo::Variable); - func_env.annotate(param, SymbolRef::Local(symbol)); - - if let Some(ty) = ¶m.ty { - self.collect_type(func_id, ty) - } - } - self.tree_walk(&mut state.fork(func_id), &lambda.body, to_visit); - Self::resolve_captures( - &self.stack, - self.engine, - self.relations, - self.externals.current, - &mut self.diagnostics, - ); - self.stack.pop(); - } - Expr::StructDeclaration(decl) => { - let struct_env_id = self.engine().track(state.content, expr); - - let struct_env = self - .current_env() - .fork(state.module, decl.name.value.as_str()); - - let local_id = self - .current_env() - .symbols - .declare_local(decl.name.to_string(), SymbolInfo::Type); - - let struct_env = self.engine().attach(struct_env_id, struct_env); - - for tparam in &decl.parameters { - struct_env - .symbols - .declare_local(tparam.name.to_string(), SymbolInfo::Type); - } - - for attribute in &decl.fields { - let struct_env = self.engine().get_environment_mut(struct_env_id).unwrap(); - struct_env - .symbols - .declare_local(attribute.name.to_string(), SymbolInfo::Variable); - self.collect_type(struct_env_id, &attribute.tpe); - } - - self.current_env().bind_source(decl, struct_env_id); - self.current_env() - .annotate(decl, SymbolRef::Local(local_id)); - } - Expr::Literal(_) | Expr::Continue(_) | Expr::Break(_) => {} - Expr::Impl(_) => todo!(), - } - state.accept_imports = false; - } - - fn resolve_captures( - stack: &[SourceId], - engine: &Engine, - relations: &mut Relations, - reef: ReefId, - diagnostics: &mut Vec, - ) { - let stack: Vec<_> = stack - .iter() - .map(|id| (*id, engine.get_environment(*id).unwrap())) - .collect(); - SymbolResolver::resolve_captures(&stack, relations, reef, diagnostics); - } - - fn collect_type(&mut self, origin: SourceId, ty: &Type) { - match ty { - Type::Parametrized(p) => match SymbolLocation::compute(&p.path) { - Err(segments) => self.diagnostics.push(make_invalid_path_diagnostic( - origin, - self.externals.current, - segments, - )), - Ok(loc) => { - for param in &p.params { - self.collect_type(origin, param) - } - let symref = self.identify_symbol( - origin, - origin, - loc, - p.segment(), - SymbolRegistry::Types, - ); - let origin_env = self.engine().get_environment_mut(origin).unwrap(); - origin_env.annotate(p, symref) - } - }, - Type::Callable(_) | Type::ByName(_) => { - panic!("Callable and By Name types are not yet supported.") - } - } - } - - fn extract_literal_argument(&self, call: &'a Call, nth: usize) -> Option<&'a str> { - match call.arguments.get(nth)? { - Expr::Literal(lit) => match &lit.parsed { - LiteralValue::String(str) => Some(str), - _ => None, - }, - _ => None, - } - } - - /// perform special operations if the bound call is a special call that may introduce new variables. - fn resolve_special_call(&mut self, env_id: SourceId, call: &Call) -> bool { - let Some(command) = self.extract_literal_argument(call, 0) else { - return false; - }; - match command { - "read" => { - if let Some(var) = self.extract_literal_argument(call, 1) { - let env = self.engine().get_environment_mut(env_id).unwrap(); - let symbol = env - .symbols - .declare_local(var.to_owned(), SymbolInfo::Variable); - env.annotate(&call.arguments[1], SymbolRef::Local(symbol)); - } - true - } - "cd" => { - let name = Name::from(vec!["std".to_owned(), command.to_owned()]); - let symbol = self.identify_symbol( - *self.stack.last().unwrap(), - env_id, - SymbolLocation { - name, - is_current_reef_explicit: false, - }, - call.arguments[0].segment().clone(), - SymbolRegistry::Objects, - ); - self.current_env().annotate(call, symbol); - true - } - _ => false, - } - } - - /// Identifies a [SymbolRef] from given source. - /// Will return [SymbolRef::Local] if the given name isn't qualified and was found in the current environment - /// Else, if the symbol does not exists, [SymbolRef::External] is returned and a new relation is requested for resolution. - fn identify_symbol( - &mut self, - source: SourceId, - origin: SourceId, - location: SymbolLocation, - segment: SourceSegment, - registry: SymbolRegistry, - ) -> SymbolRef { - let symbols = &mut self.engine.get_environment_mut(source).unwrap().symbols; - - macro_rules! track_global { - () => { - *symbols - .external(location) - .or_insert_with(|| self.relations.track_new_object(origin, registry)) - }; - } - - //if a reef is explicitly specified, then the reef and symbol's name must be resolved first - if location.is_current_reef_explicit { - return SymbolRef::External(track_global!()); - } - - let local_symbol = symbols.find_reachable(location.name.root(), registry); - - // If the requested registry is Object, and the symbol wasn't found, then try to search it in the types registry. - let local_symbol = if registry == SymbolRegistry::Objects { - local_symbol - .or_else(|| symbols.find_reachable(location.name.root(), SymbolRegistry::Types)) - } else { - local_symbol - }; - - match local_symbol { - None => SymbolRef::External(track_global!()), - Some(id) if location.name.is_qualified() => { - let var = symbols.get(id).unwrap(); - self.diagnostics.push(diagnose_invalid_symbol( - var.ty, - origin, - self.externals.current, - &location.name, - &[segment], - )); - // instantly declare a dead resolution object - // We could have returned None here to ignore the symbol but it's more appropriate to - // bind the variable occurrence with a dead object to signify that its bound symbol is invalid. - let id = track_global!(); - self.relations[id].state = RelationState::Dead; - SymbolRef::External(id) - } - Some(id) => SymbolRef::Local(id), - } - } -} - -fn import_ast( - name: Name, - importer: &mut impl ASTImporter, - visited: &mut HashSet, -) -> Option<(Imported, Name)> { - let mut parts = name.into_vec(); - while !parts.is_empty() { - let name = Name::from(parts.clone()); - if !visited.insert(name.clone()) { - return None; - } - match importer.import(&name) { - ImportResult::Success(imported) => return Some((imported, name)), - ImportResult::NotFound => { - // Nothing has been found, but we might have a chance by - // importing the parent module. - parts.pop(); - } - ImportResult::Failure => { - // Something has been found, but cannot be fully imported, - // so don't try to import anything else. - return None; - } - } - } - - None -} - -/// Lists all modules directly contained in the given module name. -fn list_inner_modules<'a>( - engine: &'a Engine, - module_fqn: &'a Name, -) -> impl Iterator { - engine - .environments() - .filter(move |(_, e)| { - e.parent.is_none() && e.fqn.tail().filter(|tail| tail == module_fqn).is_some() - }) - .map(|(_, e)| e) -} - -fn make_invalid_path_diagnostic( - source: SourceId, - reef: ReefId, - bad_segments: Vec, -) -> Diagnostic { - Diagnostic::new( - DiagnosticID::InvalidSymbolPath, - "Symbol path contains invalid items", - ) - .with_observations( - bad_segments - .into_iter() - .map(|s| Observation::context(source, reef, s, "Invalid path item")), - ) -} - -#[cfg(test)] -mod tests { - use pretty_assertions::assert_eq; - - use context::str_find::{find_in, find_in_nth}; - use parser::parse_trusted; - - use crate::importer::StaticImporter; - use crate::relations::{LocalId, RelationId}; - - use super::*; - - fn tree_walk<'e>( - expr: &'e Expr, - engine: &mut Engine<'e>, - relations: &mut Relations, - ) -> (Vec, Environment) { - let env = Environment::script(Name::new("test")); - let mut imports = Imports::default(); - let externals = Externals::default(); - let mut state = ResolutionState::new(ContentId(0), engine.track(ContentId(0), expr)); - let mut collector = SymbolCollector::new(engine, relations, &mut imports, &externals); - collector.engine.attach(SourceId(0), env); - collector.stack.push(SourceId(0)); - collector.tree_walk(&mut state, expr, &mut vec![]); - let env = collector.engine.get_environment(SourceId(0)).unwrap(); - collector.stack.pop(); - (collector.diagnostics, env.clone()) - } - - #[test] - fn use_between_expressions() { - let content = "use a; $a; use c; $c"; - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let mut imports = Imports::default(); - let mut importer = StaticImporter::new([(Name::new("test"), content)], parse_trusted); - let res = SymbolCollector::collect_symbols( - &mut engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut vec![Name::new("test")], - &mut HashSet::new(), - &mut importer, - ); - assert_eq!( - res, - vec![ - Diagnostic::new(DiagnosticID::UseBetweenExprs, "Unexpected use statement between expressions. Use statements must be at the top of the environment.") - .with_observation(( - SourceId(0), - ReefId(1), - find_in(content, "use c"), - ).into()), - ] - ) - } - - #[test] - fn bind_local_variables() { - let expr = parse_trusted("var bar = 4; $bar"); - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let diagnostics = tree_walk(&expr, &mut engine, &mut relations).0; - assert_eq!(diagnostics, vec![]); - assert_eq!(relations.iter().collect::>(), vec![]); - } - - #[test] - fn test_symbol_clashes_with_module() { - let math_source = "use math::{add, multiply, divide}; fun multiply(a: Int, b: Int) = a * b"; - let math_src = math_source; - let empty_source = ""; - - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let mut imports = Imports::default(); - let externals = Externals::default(); - let mut importer = StaticImporter::new( - [ - (Name::new("math"), math_src), - (Name::new("math::multiply"), empty_source), - (Name::new("math::add"), empty_source), - (Name::new("math::divide"), empty_source), - ], - parse_trusted, - ); - - let diagnostics = SymbolCollector::collect_symbols( - &mut engine, - &mut relations, - &mut imports, - &externals, - &mut vec![Name::new("math")], - &mut HashSet::new(), - &mut importer, - ); - assert_eq!(diagnostics, vec![ - Diagnostic::new(DiagnosticID::SymbolConflictsWithModule, "Declared symbol 'multiply' in module math clashes with module math::multiply") - .with_observation(Observation::here(SourceId(0), ReefId(1), find_in(math_source, "fun multiply(a: Int, b: Int) = a * b"), "This symbol has the same fully-qualified name as module math::multiply")) - .with_help("You should refactor this symbol with a name that does not conflicts with following modules: math::{divide, multiply, add}") - ]); - } - - #[test] - fn shadowed_imports() { - let source = "use A; use B; use A; use B"; - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let mut imports = Imports::default(); - let mut importer = StaticImporter::new([(Name::new("test"), source)], parse_trusted); - - let diagnostics = SymbolCollector::collect_symbols( - &mut engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut vec![Name::new("test")], - &mut HashSet::new(), - &mut importer, - ); - - assert_eq!( - diagnostics, - vec![ - Diagnostic::new(DiagnosticID::ShadowedImport, "A is imported twice.") - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(source, "A"), - "useless import here" - )) - .with_observation(Observation::context( - SourceId(0), - ReefId(1), - find_in_nth(source, "A", 1), - "This statement shadows previous import" - )), - Diagnostic::new(DiagnosticID::ShadowedImport, "B is imported twice.") - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(source, "B"), - "useless import here" - )) - .with_observation(Observation::context( - SourceId(0), - ReefId(1), - find_in_nth(source, "B", 1), - "This statement shadows previous import" - )), - ] - ) - } - - #[test] - fn bind_function_param() { - let source = "fun id(a) = return $a"; - let expr = parse_trusted(source); - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let (diagnostics, env) = tree_walk(&expr, &mut engine, &mut relations); - assert_eq!(diagnostics, vec![]); - assert_eq!(relations.iter().collect::>(), vec![]); - assert_eq!( - env.get_raw_symbol(source.segment()), - Some(SymbolRef::Local(LocalId(0))) - ); - assert_eq!(env.get_raw_symbol(find_in(source, "a")), None); - assert_eq!(env.get_raw_symbol(find_in(source, "$a")), None); - let func_env = engine.get_environment(SourceId(1)).unwrap(); - assert_eq!( - func_env.get_raw_symbol(find_in(source, "a")), - Some(SymbolRef::Local(LocalId(0))) - ); - assert_eq!( - func_env.get_raw_symbol(find_in(source, "$a")), - Some(SymbolRef::Local(LocalId(0))) - ); - } - - #[test] - fn bind_primitive() { - let source = "read foo"; - let expr = parse_trusted(source); - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let (diagnostics, env) = tree_walk(&expr, &mut engine, &mut relations); - assert_eq!(diagnostics, vec![]); - assert_eq!(relations.iter().collect::>(), vec![]); - assert_eq!(env.get_raw_symbol(find_in(source, "read")), None); - assert_eq!( - env.get_raw_symbol(find_in(source, "foo")), - Some(SymbolRef::Local(LocalId(0))) - ); - } - - #[test] - fn find_references() { - let source = "$bar; baz($foo, $bar)"; - let expr = parse_trusted(source); - - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let (diagnostics, _) = tree_walk(&expr, &mut engine, &mut relations); - assert_eq!(diagnostics, vec![]); - assert_eq!( - relations - .find_references(&engine, RelationId(0)) - .map(|mut references| { - references.sort_by_key(|range| range.start); - references - }), - Some(vec![ - find_in(source, "$bar"), - find_in_nth(source, "$bar", 1) - ]) - ); - assert_eq!( - relations.find_references(&engine, RelationId(1)), - Some(vec![find_in(source, "baz($foo, $bar)")]) - ); - assert_eq!( - relations.find_references(&engine, RelationId(2)), - Some(vec![find_in(source, "$foo")]) - ); - } -} diff --git a/analyzer/src/steps/resolve.rs b/analyzer/src/steps/resolve.rs deleted file mode 100644 index 3ef62881..00000000 --- a/analyzer/src/steps/resolve.rs +++ /dev/null @@ -1,1370 +0,0 @@ -use ast::Expr; -use std::collections::HashSet; -use std::iter::once; - -use crate::diagnostic::{Diagnostic, DiagnosticID, Observation}; -use crate::engine::Engine; -use crate::environment::symbols::{resolve_loc, MagicSymbolKind, SymbolRegistry}; -use crate::environment::Environment; -use crate::imports::Imports; -use crate::name::Name; -use crate::reef::{Externals, ReefId}; -use crate::relations::{ - LocalId, RelationId, RelationState, Relations, ResolvedSymbol, SourceId, SymbolRef, -}; -use crate::steps::resolve::diagnostics::*; -use crate::steps::resolve::symbol::{ - resolve_absolute_symbol, resolve_symbol_from_imports, resolve_symbol_from_locals, - SymbolResolutionResult, -}; -use crate::steps::shared_diagnostics::diagnose_invalid_symbol; -use crate::steps::typing::magic::is_magic_variable_name; - -mod diagnostics; -mod import; -mod symbol; - -/// Main structure of the Symbols Resolver -/// The symbol resolver resolves the given relations between the collected symbols in the Engine. -/// -/// - lifetime 'a is the lifetime of references -/// - lifetime 'e is the expressions' lifetime, the Engine and Relations needed a special lifetime -/// as both of them contains references to AST expressions. -pub struct SymbolResolver<'a, 'e> { - engine: &'a Engine<'e>, - relations: &'a mut Relations, - imports: &'a mut Imports, - externals: &'a Externals<'a>, - - diagnostics: Vec, -} - -impl<'a, 'e> SymbolResolver<'a, 'e> { - ///Attempts to resolve the unresolved Engine's symbols contained in the given Relations. - /// Returns a vector of diagnostics raised by the resolution process. - pub fn resolve_symbols( - engine: &'a Engine<'e>, - relations: &'a mut Relations, - imports: &'a mut Imports, - externals: &'a Externals<'e>, - to_visit: &mut Vec, - visited: &HashSet, - ) -> Vec { - let mut resolver = Self::new(engine, relations, imports, externals); - resolver.resolve(to_visit, visited); - resolver.diagnostics - } - - /// Resolves symbols in an immediate environment, - /// where the capture environment is the last env of the given `env_stack` - /// - /// Nested environments, where [`Environment::has_strict_declaration_order`] is `true`, resolve - /// differently from non-nested environments. In a nested environment, the symbols captures - /// the order of the declarations. To know what is in scope, resolution must be done immediately - /// after the declaration of the environment that captures, during the collection phase. - /// - /// Imports are on the other hand always resolved after the collection phase is complete, during - /// a call to [`SymbolResolver::resolve_trees`], when using [`SymbolResolver::resolve_symbols`]. - pub fn resolve_captures( - env_stack: &[(SourceId, &Environment)], - relations: &mut Relations, - reef: ReefId, - diagnostics: &mut Vec, - ) { - fn diagnose_invalid_symbol_in_capture( - env_stack: Vec<&Environment>, - capture_env_id: SourceId, - reef: ReefId, - name: &Name, - local: LocalId, - external: RelationId, - ) -> Diagnostic { - let mut segments: Vec<_> = env_stack - .iter() - .flat_map(|env| env.find_references(SymbolRef::External(external))) - .collect(); - - segments.sort_by_key(|s| s.start); - - //TODO support observations in foreign environments to include concerned symbol declaration in diagnostics - let declaration_env = *env_stack.last().unwrap(); - - let var = declaration_env.symbols.get(local).unwrap(); - diagnose_invalid_symbol(var.ty, capture_env_id, reef, name, &segments) - } - - let ((capture_env_id, capture_env), parents) = - env_stack.split_last().expect("env_stack is empty"); - - 'capture: for (loc, relation_id) in capture_env.symbols.external_symbols() { - let name = &loc.name; - for (pos, (env_id, env)) in parents.iter().rev().enumerate() { - let relation = &mut relations[relation_id]; - - if let Some(local) = env.symbols.find_reachable(name.root(), relation.registry) { - if name.is_qualified() { - let erroneous_capture = parents.iter().rev().take(pos + 1).map(|(_, s)| *s); - - let erroneous_capture = - once(*capture_env).chain(erroneous_capture).collect(); - - let diagnostic = diagnose_invalid_symbol_in_capture( - erroneous_capture, - *capture_env_id, - reef, - name, - local, - relation_id, - ); - diagnostics.push(diagnostic); - relation.state = RelationState::Dead; - } else { - let symbol = ResolvedSymbol { - reef, - source: *env_id, - object_id: local, - }; - relation.state = RelationState::Resolved(symbol); - } - continue 'capture; - } - } - } - } - - fn new( - engine: &'a Engine<'e>, - relations: &'a mut Relations, - imports: &'a mut Imports, - externals: &'a Externals<'e>, - ) -> Self { - Self { - engine, - relations, - imports, - externals, - - diagnostics: Vec::new(), - } - } - - /// The starting point of the resolution phase. - /// enables the resolution and pushes diagnostics if any symbol could not be resolved. - fn resolve(&mut self, to_visit: &mut Vec, visited: &HashSet) { - for (env_id, _) in self.engine.environments() { - if let Some(imports) = self.imports.get_imports_mut(env_id) { - Self::resolve_imports( - self.externals, - self.engine, - env_id, - imports, - &mut self.diagnostics, - ); - } - } - self.resolve_trees(to_visit, visited); - } - - /// Iterates over remaining unresolved symbols, and tries to resolve them by traversing the parent chain. - /// - /// This resolution should happen after all imports have been resolved in their respective environments, - /// to allow child environments to use imports from their parents. - fn resolve_trees(&mut self, to_visit: &mut Vec, visited: &HashSet) { - for (relation_id, relation) in self.relations.iter_mut() { - if relation.state != RelationState::Unresolved { - continue; - } - let (origin, registry) = { (relation.origin, relation.registry) }; - - // Get the local naming of the object - let origin_env = self - .engine - .get_environment(origin) - .expect("Environment declared an unknown parent"); - - let symbol_loc = origin_env - .symbols - .find_external_symbol_name(relation_id) - .expect("Unknown object name"); - let symbol_name = &symbol_loc.name; - - let mut result = SymbolResolutionResult::NotFound; - let current_reef = self.externals.current; - - // if it explicitly targets the current reef, then do a simple search of the corresponding name - if symbol_loc.is_current_reef_explicit { - result = resolve_absolute_symbol(self.engine, symbol_name, current_reef, registry); - } else { - // Follow the parent chain until we get a decisive result - let mut current = Some((origin, origin_env)); - while let Some((env_id, env)) = current { - if env_id != origin && !env.has_strict_declaration_order() { - // Locals symbols are always treated first, before imports. - // The current environment might already owns the resolution result as a global symbol. - // This happens only if it used it, so we ignore that fact here to always solve external - // symbols via imports. - // - // We omit this resolution for origin environments as the resolution of their own locals - // is done by the collection phase - result = resolve_symbol_from_locals( - env_id, - env, - symbol_name, - current_reef, - registry, - ); - } - - if result == SymbolResolutionResult::NotFound { - if let Some(imports) = &self.imports.get_imports(env_id) { - // If the symbol wasn't found from the environment locals, try to resolve using its imports - result = resolve_symbol_from_imports( - self.engine, - imports, - symbol_name, - self.externals, - registry, - ) - } - } - - if result != SymbolResolutionResult::NotFound { - break; //we found something - } - - current = env - .parent - .and_then(|id| self.engine.get_environment(id).map(|env| (id, env))); - } - - //ultimate step is to try to resolve this symbol as an absolute symbol. - if result == SymbolResolutionResult::NotFound { - result = resolve_loc(symbol_loc, self.engine, self.externals) - //cannot reference to self content if location does not explicitly references current reef - .filter(|(_, id)| *id != current_reef) - .map(|(engine, id)| { - resolve_absolute_symbol(engine, symbol_name, id, registry) - }) - .unwrap_or(SymbolResolutionResult::NotFound) - } - - //if the symbol is a type, and if its name is unqualified, try to resolve it from the special `lang` reef. - if registry == SymbolRegistry::Types - && (!symbol_name.is_qualified() - || (symbol_name.parts().len() == 2 && symbol_name.root() == "lang")) - { - if let Some(primitive_type) = self - .externals - .lang() - .type_context - .get_type_id(symbol_name.simple_name()) - { - result = SymbolResolutionResult::Resolved(ResolvedSymbol::lang_symbol( - LocalId(primitive_type.0), - )); - } - } - - // if the symbol is an object, and not qualified, try to treat it as a magic variable. - if registry == SymbolRegistry::Objects - && !symbol_name.is_qualified() - && is_magic_variable_name(symbol_name.root()) - { - let mut script_id = origin; - let declared_pargs = loop { - let script_env = self.engine.get_environment(script_id).unwrap(); - if let Some(declared_pargs) = script_env - .symbols - .find_magic(MagicSymbolKind::ProgramArguments) - { - break declared_pargs; - } - - script_id = script_env - .parent - .expect("No implicit program arguments has been declared"); - }; - - let resolved = ResolvedSymbol::new(current_reef, script_id, declared_pargs); - result = SymbolResolutionResult::Resolved(resolved) - } - } - - match result { - SymbolResolutionResult::Resolved(symbol) => { - if symbol.reef == current_reef - && matches!( - self.engine.get_expression(origin), - Some(Expr::StructDeclaration(_)) - ) - { - let origin_parent = origin_env.parent.unwrap(); - let mut observations: Vec<_> = - origin_env.find_references(SymbolRef::External(relation_id)) - .into_iter() - .map(|seg| Observation::context( - origin_parent, - current_reef, - seg, - "This structure contains fields whose types are defined in the structure's reef", - )) - .collect(); - observations.sort_by_key(|s| s.location.segment.start); - self.diagnostics.push(Diagnostic::new( - DiagnosticID::UnsupportedFeature, - "Referencing user-defined types from within a structure is not supported yet", - ).with_observations(observations)) - } - relation.state = RelationState::Resolved(symbol); - } - SymbolResolutionResult::DeadImport => { - self.diagnostics - .push(diagnose_invalid_symbol_from_dead_import( - self.engine, - origin, - current_reef, - self.imports.get_imports(origin).unwrap(), - relation_id, - symbol_name, - )); - relation.state = RelationState::Dead; - } - SymbolResolutionResult::Invalid(symbol) => { - let env_var = self - .engine - .get_environment(symbol.source) - .expect("resolved symbol points to an unknown environment"); - let var = env_var - .symbols - .get(symbol.object_id) - .expect("resolved symbol points to an unknown variable in environment"); - let mut occurrences: Vec<_> = - origin_env.find_references(SymbolRef::External(relation_id)); - occurrences.sort_by_key(|s| s.start); - - self.diagnostics.push(diagnose_invalid_symbol( - var.ty, - origin, - current_reef, - symbol_name, - &occurrences, - )); - relation.state = RelationState::Dead; - } - // All attempts failed to resolve the symbol - SymbolResolutionResult::NotFound => { - let symbol_env_name = symbol_name.tail().unwrap_or(symbol_name.clone()); - - if symbol_name.is_qualified() && !visited.contains(&symbol_env_name) { - // We put the unknown name in the visitable - to_visit.push(symbol_env_name); - // the name wasn't known from the analyzer, let's give it a chance to be resolved - continue; - } //if the name were already requested, it's definitely unresolvable - - // If we reach this point, the symbol could not be resolved, during any of the previous phases / cycles. - self.diagnostics.push(diagnose_unresolved_external_symbols( - relation_id, - origin, - current_reef, - origin_env, - symbol_name, - )); - relation.state = RelationState::Dead - } - } - } - } -} - -#[cfg(test)] -mod tests { - use std::collections::{HashMap, HashSet}; - - use indexmap::IndexMap; - - use context::str_find::{find_in, find_in_nth}; - use parser::parse_trusted; - - use crate::diagnostic::{Diagnostic, DiagnosticID, Observation}; - use crate::engine::Engine; - use crate::environment::symbols::{SymbolLocation, SymbolRegistry}; - use crate::importer::StaticImporter; - use crate::imports::{Imports, ResolvedImport, SourceImports, UnresolvedImport}; - use crate::name::Name; - use crate::reef::{Externals, Reef, ReefId, LANG_REEF}; - use crate::relations::{ - LocalId, Relation, RelationId, RelationState, Relations, ResolvedSymbol, SourceId, - }; - use crate::steps::collect::SymbolCollector; - use crate::steps::resolve::SymbolResolver; - use crate::steps::resolve_sources; - use crate::types::INT; - use crate::{resolve_all, ResolutionResult}; - - use pretty_assertions::assert_eq; - - #[test] - fn test_reefs_external_symbols_resolution() { - let mut reefs = Externals::default(); - - fn define_reef<'e, const N: usize>( - externals: &Externals, - sources: [(Name, &'e str); N], - ) -> (Vec, ResolutionResult<'e>) { - let mut diagnostics = Vec::new(); - let mut result = ResolutionResult::default(); - - resolve_sources( - sources.iter().map(|(n, _)| n.clone()).collect(), - &mut result, - &mut StaticImporter::new(sources, parse_trusted), - externals, - &mut diagnostics, - ); - - (diagnostics, result) - } - - let (diagnostics, result) = define_reef( - &reefs, - [(Name::new("std"), "fun foo() -> Exitcode = echo stdlib")], - ); - assert_eq!(diagnostics, vec![]); - reefs.register(Reef::new_partial( - "std".to_owned(), - result.engine, - result.relations, - )); - - let main_source = "use std::foo; use reef::std::foo as my_foo; reef::std::foo(); std::foo(); foo(); my_foo()"; - let (diagnostics, result) = define_reef( - &reefs, - [ - (Name::new("main"), main_source), - (Name::new("std"), "fun foo() -> Exitcode = echo fake stdlib"), - ], - ); - assert_eq!(diagnostics, vec![]); - reefs.register(Reef::new_partial( - "test".to_owned(), - result.engine, - result.relations, - )); - - let reef = reefs.get_reef(ReefId(2)).unwrap(); - assert_eq!(reef.name, "test"); - assert_eq!(reefs.get_reef(ReefId(1)).unwrap().name, "std"); - assert_eq!(reefs.get_reef(LANG_REEF).unwrap().name, "lang"); - - assert_eq!( - result.imports.get_imports(SourceId(2)).unwrap(), - &SourceImports::with( - IndexMap::new(), - HashMap::from([ - ( - "foo".to_string(), - ( - ResolvedImport::Symbols(HashMap::from([( - SymbolRegistry::Objects, - ResolvedSymbol::new(ReefId(1), SourceId(0), LocalId(0)) - )])), - find_in(main_source, "std::foo") - ) - ), - ( - "my_foo".to_string(), - ( - ResolvedImport::Symbols(HashMap::from([( - SymbolRegistry::Objects, - ResolvedSymbol::new(ReefId(2), SourceId(0), LocalId(0)) - )])), - find_in(main_source, "reef::std::foo as my_foo") - ) - ), - ]) - ) - ); - - assert_eq!( - reef.relations.iter().collect::>(), - vec![ - ( - // this one is foo's return type relation with Exitcode lang type - RelationId(0), - &Relation::resolved( - SourceId(1), - ResolvedSymbol::lang_symbol(LocalId(4)), - SymbolRegistry::Types, - ) - ), - // main's imports relations - ( - RelationId(1), - &Relation::resolved( - SourceId(2), - ResolvedSymbol::new(ReefId(2), SourceId(0), LocalId(0)), - SymbolRegistry::Objects, - ) - ), - ( - RelationId(2), - &Relation::resolved( - SourceId(2), - ResolvedSymbol::new(ReefId(1), SourceId(0), LocalId(0)), - SymbolRegistry::Objects, - ) - ), - ( - RelationId(3), - &Relation::resolved( - SourceId(2), - ResolvedSymbol::new(ReefId(1), SourceId(0), LocalId(0)), - SymbolRegistry::Objects, - ) - ), - ( - RelationId(4), - &Relation::resolved( - SourceId(2), - ResolvedSymbol::new(ReefId(2), SourceId(0), LocalId(0)), - SymbolRegistry::Objects, - ) - ), - ] - ) - } - - #[test] - fn report_unknown_imported_symbol() { - let mut importer = StaticImporter::new( - [ - (Name::new("main"), "reef::math::id(9)"), - ( - Name::new("math"), - "fun id(n: Int) -> Int = $n\nfun dummy() = {}", - ), - ], - parse_trusted, - ); - let externals = Externals::default(); - let mut diagnostics = Vec::new(); - let res = resolve_all( - Name::new("main"), - &externals, - &mut importer, - &mut diagnostics, - ); - assert_eq!(diagnostics, vec![]); - assert_eq!( - res.relations - .iter() - .map(|(_, r)| r.clone()) - .collect::>(), - vec![ - Relation::resolved( - SourceId(0), - ResolvedSymbol::new(ReefId(1), SourceId(1), LocalId(0)), - SymbolRegistry::Objects, - ), - Relation::resolved( - SourceId(2), - ResolvedSymbol::lang_symbol(LocalId(INT.type_id.0)), - SymbolRegistry::Types, - ), - ] - ) - } - - #[test] - fn test_capture() { - let src = "\ - fun foo() = { - var x = 1 - fun foo1() = { - echo $x - fun foo2() = { - echo $x - } - } - } - "; - let mut importer = StaticImporter::new([(Name::new("main"), src)], parse_trusted); - let externals = Externals::default(); - let mut diagnostics = Vec::new(); - let res = resolve_all( - Name::new("main"), - &externals, - &mut importer, - &mut diagnostics, - ); - assert_eq!(diagnostics, vec![]); - assert_eq!( - res.relations.iter().collect::>(), - vec![ - ( - RelationId(0), - &Relation::resolved( - SourceId(2), - ResolvedSymbol::new(ReefId(1), SourceId(1), LocalId(0)), - SymbolRegistry::Objects, - ) - ), - ( - RelationId(1), - &Relation::resolved( - SourceId(3), - ResolvedSymbol::new(ReefId(1), SourceId(1), LocalId(0)), - SymbolRegistry::Objects, - ) - ), - ] - ) - } - - #[test] - fn test_reef_imports_resolution() { - let math_src = "val PI = 3.14"; - let std_src = "val Foo = 'moshell_std'; val Bar = $Foo"; - let io_src = "fun output() = (); fun input() = ()"; - let test_src = " - use reef::math::PI - use reef::std::{io, foo} - use reef::std::* - use reef::std::io::{input, output} - "; - - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let mut imports = Imports::default(); - - let mut to_visit = vec![Name::new("test")]; - let mut visited = HashSet::new(); - let mut importer = StaticImporter::new( - [ - (Name::new("math"), math_src), - (Name::new("std"), std_src), - (Name::new("std::io"), io_src), - (Name::new("test"), test_src), - ], - parse_trusted, - ); - let mut diagnostics = SymbolCollector::collect_symbols( - &mut engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &mut visited, - &mut importer, - ); - assert_eq!(diagnostics, vec![]); - let test_env_imports = imports.get_imports_mut(SourceId(0)).unwrap(); - assert_eq!( - test_env_imports, - &SourceImports::with( - IndexMap::from([ - ( - UnresolvedImport::Symbol { - alias: None, - loc: SymbolLocation::in_current_reef(Name::new("math::PI")), - }, - find_in(test_src, "reef::math::PI") - ), - ( - UnresolvedImport::Symbol { - alias: None, - loc: SymbolLocation::in_current_reef(Name::new("std::io")), - }, - find_in(test_src, "io") - ), - ( - UnresolvedImport::Symbol { - alias: None, - loc: SymbolLocation::in_current_reef(Name::new("std::foo")), - }, - find_in(test_src, "foo") - ), - ( - UnresolvedImport::AllIn(SymbolLocation::in_current_reef(Name::new("std"))), - find_in(test_src, "reef::std::*") - ), - ( - UnresolvedImport::Symbol { - alias: None, - loc: SymbolLocation::in_current_reef(Name::new("std::io::input")), - }, - find_in(test_src, "input") - ), - ( - UnresolvedImport::Symbol { - alias: None, - loc: SymbolLocation::in_current_reef(Name::new("std::io::output")), - }, - find_in(test_src, "output") - ), - ]), - HashMap::new() - ), - ); - - SymbolResolver::resolve_imports( - &Externals::default(), - &engine, - SourceId(0), - test_env_imports, - &mut diagnostics, - ); - assert_eq!(to_visit, vec![]); - assert_eq!( - diagnostics, - vec![Diagnostic::new( - DiagnosticID::ImportResolution, - "unable to find imported symbol `foo` in module `std`.", - ) - .with_observation((SourceId(0), ReefId(1), find_in(test_src, "foo")).into())] - ); - - assert_eq!( - test_env_imports, - &SourceImports::with( - IndexMap::default(), - HashMap::from([ - ( - "io".to_string(), - ( - ResolvedImport::Env { - reef: ReefId(1), - source: SourceId(1), - }, - find_in(test_src, "io") - ) - ), - ( - "foo".to_string(), - (ResolvedImport::Dead, find_in(test_src, "foo")) - ), - ( - "PI".to_string(), - ( - ResolvedImport::Symbols(HashMap::from([( - SymbolRegistry::Objects, - ResolvedSymbol::new(ReefId(1), SourceId(5), LocalId(0)) - )])), - find_in(test_src, "reef::math::PI") - ) - ), - ( - "Bar".to_string(), - ( - ResolvedImport::Symbols(HashMap::from([( - SymbolRegistry::Objects, - ResolvedSymbol::new(ReefId(1), SourceId(4), LocalId(1)) - )])), - find_in(test_src, "reef::std::*") - ) - ), - ( - "Foo".to_string(), - ( - ResolvedImport::Symbols(HashMap::from([( - SymbolRegistry::Objects, - ResolvedSymbol::new(ReefId(1), SourceId(4), LocalId(0)) - )])), - find_in(test_src, "reef::std::*") - ) - ), - ( - "output".to_string(), - ( - ResolvedImport::Symbols(HashMap::from([( - SymbolRegistry::Objects, - ResolvedSymbol::new(ReefId(1), SourceId(1), LocalId(0)) - )])), - find_in(test_src, "output") - ) - ), - ( - "input".to_string(), - ( - ResolvedImport::Symbols(HashMap::from([( - SymbolRegistry::Objects, - ResolvedSymbol::new(ReefId(1), SourceId(1), LocalId(1)) - )])), - find_in(test_src, "input") - ) - ), - ]) - ) - ); - } - - #[test] - fn test_symbols_resolution() { - let math_src = "val PI = 3.14"; - let std_src = "val Foo = 'moshell_std'; val Bar = $Foo"; - let io_src = "val output = 'OutputStream()'; val input = 'InputStream()'"; - let test_src = "\ - use reef::math::PI - use reef::std::{Bar, io::*} - - fun foo() = $x - - val output = $output - val x = $Bar - val y = $PI - "; - - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let mut imports = Imports::default(); - let mut importer = StaticImporter::new( - [ - (Name::new("math"), math_src), - (Name::new("std"), std_src), - (Name::new("std::io"), io_src), - (Name::new("test"), test_src), - ], - parse_trusted, - ); - - let mut to_visit = vec![Name::new("test")]; - let mut visited = HashSet::new(); - - let diagnostics = SymbolCollector::collect_symbols( - &mut engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &mut visited, - &mut importer, - ); - assert_eq!(diagnostics, vec![]); - let diagnostics = SymbolResolver::resolve_symbols( - &engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &visited, - ); - assert_eq!(diagnostics, vec![]); - - assert_eq!( - relations.iter().map(|(_, r)| r.clone()).collect::>(), - vec![ - Relation::resolved( - SourceId(1), - ResolvedSymbol::new(ReefId(1), SourceId(0), LocalId(2)), - SymbolRegistry::Objects, - ), - Relation::resolved( - SourceId(0), - ResolvedSymbol::new(ReefId(1), SourceId(2), LocalId(0)), - SymbolRegistry::Objects, - ), - Relation::resolved( - SourceId(0), - ResolvedSymbol::new(ReefId(1), SourceId(3), LocalId(1)), - SymbolRegistry::Objects, - ), - Relation::resolved( - SourceId(0), - ResolvedSymbol::new(ReefId(1), SourceId(4), LocalId(0)), - SymbolRegistry::Objects, - ), - ] - ) - } - - #[test] - fn test_qualified_symbols_resolution() { - let math_src = "fun add(a: Int, b: Int) = a + b"; - let math_advanced_src = "fun multiply(a: Int, b: Int) = a * b"; - let std_src = "fun foo() = 45; fun bar() = 78"; - let test_src = - "\ - use reef::math::advanced - - val x = reef::std::foo() - val y = reef::std::bar() - val sum = reef::math::add($x + $y, advanced::multiply(reef::std::foo(), reef::std::bar())) - "; - - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let mut imports = Imports::default(); - let mut importer = StaticImporter::new( - [ - (Name::new("math"), math_src), - (Name::new("std"), std_src), - (Name::new("math::advanced"), math_advanced_src), - (Name::new("test"), test_src), - ], - parse_trusted, - ); - - let mut to_visit = vec![Name::new("test")]; - let mut visited = HashSet::new(); - - //first cycle - let diagnostics = SymbolCollector::collect_symbols( - &mut engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &mut visited, - &mut importer, - ); - assert_eq!(diagnostics, vec![]); - let diagnostics = SymbolResolver::resolve_symbols( - &engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &visited, - ); - assert_eq!(diagnostics, vec![]); - to_visit.sort(); - to_visit.dedup(); - assert_eq!(to_visit, vec![Name::new("math"), Name::new("std")]); - let mut visited_vec = visited.iter().cloned().collect::>(); - visited_vec.sort(); - assert_eq!( - visited_vec, - vec![Name::new("math::advanced"), Name::new("test")] - ); - - //second cycle - let diagnostics = SymbolCollector::collect_symbols( - &mut engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &mut visited, - &mut importer, - ); - assert_eq!(diagnostics, vec![]); - let diagnostics = SymbolResolver::resolve_symbols( - &engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &visited, - ); - assert_eq!(diagnostics, vec![]); - assert_eq!(to_visit, vec![]); - - let mut visited_vec = visited.iter().cloned().collect::>(); - visited_vec.sort(); - assert_eq!( - visited_vec, - vec![ - Name::new("math"), - Name::new("math::advanced"), - Name::new("std"), - Name::new("test"), - ] - ); - - assert_eq!( - relations.iter().map(|(_, r)| r.clone()).collect::>(), - vec![ - Relation::resolved( - SourceId(0), - ResolvedSymbol::new(ReefId(1), SourceId(3), LocalId(0)), - SymbolRegistry::Objects, - ), - Relation::resolved( - SourceId(0), - ResolvedSymbol::new(ReefId(1), SourceId(3), LocalId(1)), - SymbolRegistry::Objects, - ), - Relation::resolved( - SourceId(0), - ResolvedSymbol::new(ReefId(1), SourceId(6), LocalId(0)), - SymbolRegistry::Objects, - ), - Relation::resolved( - SourceId(0), - ResolvedSymbol::new(ReefId(1), SourceId(1), LocalId(0)), - SymbolRegistry::Objects, - ), - Relation::resolved( - SourceId(2), - ResolvedSymbol::lang_symbol(LocalId(INT.type_id.0)), - SymbolRegistry::Types, - ), - Relation::resolved( - SourceId(7), - ResolvedSymbol::lang_symbol(LocalId(INT.type_id.0)), - SymbolRegistry::Types, - ), - ] - ) - } - - #[test] - fn test_symbol_invalid_inner_symbol() { - let test_src = "\ - fun foo() = 75 - foo::x(); foo::y::z(); foo::y::z() - - fun bar() = { - foo::y::z() - foo::y::z() - foo::y::z() - foz::x() - - fun foz() = { - a::foo::in_local() - } - - var a = 78 - - foz::x() - } - "; - - let mut importer = StaticImporter::new([(Name::new("test"), test_src)], parse_trusted); - let externals = Externals::default(); - let mut diagnostics = Vec::new(); - let result = resolve_all( - Name::new("test"), - &externals, - &mut importer, - &mut diagnostics, - ); - let relations = result.relations; - - assert_eq!( - diagnostics, - vec![ - Diagnostic::new( - DiagnosticID::InvalidSymbol, - "`foo` is a function which cannot export any inner symbols" - ) - .with_observation((SourceId(0), ReefId(1), find_in(test_src, "foo::x()")).into()) - .with_help("`x` is an invalid symbol in function `foo`"), - Diagnostic::new( - DiagnosticID::InvalidSymbol, - "`foo` is a function which cannot export any inner symbols" - ) - .with_observation((SourceId(0), ReefId(1), find_in(test_src, "foo::y::z()")).into()) - .with_help("`y::z` is an invalid symbol in function `foo`"), - Diagnostic::new( - DiagnosticID::InvalidSymbol, - "`foo` is a function which cannot export any inner symbols" - ) - .with_observation( - ( - SourceId(0), - ReefId(1), - find_in_nth(test_src, "foo::y::z()", 1) - ) - .into() - ) - .with_help("`y::z` is an invalid symbol in function `foo`"), - Diagnostic::new( - DiagnosticID::InvalidSymbol, - "`foz` is a function which cannot export any inner symbols" - ) - .with_observation( - (SourceId(2), ReefId(1), find_in_nth(test_src, "foz::x()", 1)).into() - ) - .with_help("`x` is an invalid symbol in function `foz`"), - Diagnostic::new( - DiagnosticID::InvalidSymbol, - "`foo` is a function which cannot export any inner symbols", - ) - .with_observation( - ( - SourceId(2), - ReefId(1), - find_in_nth(test_src, "foo::y::z()", 2) - ) - .into() - ) - .with_observation( - ( - SourceId(2), - ReefId(1), - find_in_nth(test_src, "foo::y::z()", 3) - ) - .into() - ) - .with_observation( - ( - SourceId(2), - ReefId(1), - find_in_nth(test_src, "foo::y::z()", 4) - ) - .into() - ) - .with_help("`y::z` is an invalid symbol in function `foo`"), - Diagnostic::new( - DiagnosticID::UnknownSymbol, - "Could not resolve symbol `a::foo::in_local`." - ) - .with_observation( - ( - SourceId(3), - ReefId(1), - find_in(test_src, "a::foo::in_local()") - ) - .into() - ), - ] - ); - - for (_, relation) in relations.iter() { - assert_eq!(relation.state, RelationState::Dead) - } - } - - #[test] - fn test_unknown_symbols() { - let a_src = "val C = 'A'"; - - let source = "\ - use reef::A::B - use reef::B::C - use C::* - - $a; $a; $a - $C; $B; - "; - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let mut imports = Imports::default(); - let mut importer = StaticImporter::new( - [(Name::new("test"), source), (Name::new("A"), a_src)], - parse_trusted, - ); - - let mut to_visit = vec![Name::new("test")]; - let mut visited = HashSet::new(); - - let diagnostics = SymbolCollector::collect_symbols( - &mut engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &mut visited, - &mut importer, - ); - assert_eq!(diagnostics, vec![]); - - let diagnostics = SymbolResolver::resolve_symbols( - &engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &visited, - ); - to_visit.dedup(); - assert_eq!(to_visit, vec![]); - assert_eq!( - diagnostics, - vec![ - Diagnostic::new( - DiagnosticID::ImportResolution, - "unable to find imported symbol `B` in module `A`.", - ) - .with_observation((SourceId(0), ReefId(1), find_in(source, "reef::A::B")).into()), - Diagnostic::new( - DiagnosticID::ImportResolution, - "unable to find imported symbol `B::C`." - ) - .with_observation((SourceId(0), ReefId(1), find_in(source, "reef::B::C")).into()), - Diagnostic::new( - DiagnosticID::ImportResolution, - "unable to find reef `C`." - ) - - .with_observation((SourceId(0), ReefId(1), find_in(source, "C::*")).into()), - Diagnostic::new( - DiagnosticID::UnknownSymbol, - "Could not resolve symbol `a`." - ) - .with_observation((SourceId(0), ReefId(1), find_in_nth(source, "$a", 0)).into()) - .with_observation((SourceId(0), ReefId(1), find_in_nth(source, "$a", 1)).into()) - .with_observation((SourceId(0), ReefId(1), find_in_nth(source, "$a", 2)).into()), - Diagnostic::new( - DiagnosticID::InvalidSymbol, - "unresolvable symbol `C` has no choice but to be ignored due to invalid import of `C`." - ) - .with_observation(Observation::context(SourceId(0), ReefId(1), find_in_nth(source, "reef::B::C", 0), "invalid import introduced here")) - .with_observation((SourceId(0), ReefId(1), find_in(source, "$C")).into()), - Diagnostic::new( - DiagnosticID::InvalidSymbol, - "unresolvable symbol `B` has no choice but to be ignored due to invalid import of `B`." - ) - .with_observation(Observation::context(SourceId(0), ReefId(1), find_in_nth(source, "reef::A::B", 0), "invalid import introduced here")) - .with_observation((SourceId(0), ReefId(1), find_in(source, "$B")).into()), - ] - ) - } - - #[test] - fn test_global_unknown_symbols() { - let source = "\ - $C; $C - var C = 45 - $a; $a; $a - $C; $C; - "; - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let mut imports = Imports::default(); - let mut importer = StaticImporter::new([(Name::new("test"), source)], parse_trusted); - - let mut to_visit = vec![Name::new("test")]; - let mut visited = HashSet::new(); - - let diagnostics = SymbolCollector::collect_symbols( - &mut engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &mut visited, - &mut importer, - ); - assert_eq!(diagnostics, vec![]); - - let diagnostic = SymbolResolver::resolve_symbols( - &engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &visited, - ); - - assert_eq!( - diagnostic, - vec![ - Diagnostic::new(DiagnosticID::UnknownSymbol, "Could not resolve symbol `C`.") - .with_observation((SourceId(0), ReefId(1), find_in_nth(source, "$C", 0)).into()) - .with_observation( - (SourceId(0), ReefId(1), find_in_nth(source, "$C", 1)).into() - ), - Diagnostic::new(DiagnosticID::UnknownSymbol, "Could not resolve symbol `a`.") - .with_observation((SourceId(0), ReefId(1), find_in_nth(source, "$a", 0)).into()) - .with_observation((SourceId(0), ReefId(1), find_in_nth(source, "$a", 1)).into()) - .with_observation( - (SourceId(0), ReefId(1), find_in_nth(source, "$a", 2)).into() - ), - ] - ) - } - - #[test] - fn test_local_unknown_symbols() { - let source = "\ - fun foo() = { - $C; $C - var C = 45 - $a; $a; $a - $C; $C; - } - "; - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let mut imports = Imports::default(); - let mut importer = StaticImporter::new([(Name::new("test"), source)], parse_trusted); - - let mut to_visit = vec![Name::new("test")]; - let mut visited = HashSet::new(); - - let diagnostics = SymbolCollector::collect_symbols( - &mut engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &mut visited, - &mut importer, - ); - assert_eq!(diagnostics, vec![]); - - let diagnostic = SymbolResolver::resolve_symbols( - &engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &visited, - ); - - assert_eq!( - diagnostic, - vec![ - Diagnostic::new(DiagnosticID::UnknownSymbol, "Could not resolve symbol `C`.",) - .with_observation((SourceId(1), ReefId(1), find_in(source, "$C")).into()) - .with_observation( - (SourceId(1), ReefId(1), find_in_nth(source, "$C", 1)).into() - ), - Diagnostic::new(DiagnosticID::UnknownSymbol, "Could not resolve symbol `a`.",) - .with_observation((SourceId(1), ReefId(1), find_in_nth(source, "$a", 0)).into()) - .with_observation((SourceId(1), ReefId(1), find_in_nth(source, "$a", 1)).into()) - .with_observation( - (SourceId(1), ReefId(1), find_in_nth(source, "$a", 2)).into() - ), - ] - ) - } - - #[test] - fn find_in_parent_environment() { - let source = "val found = 'false'; fun find() = $found"; - - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let mut imports = Imports::default(); - let mut importer = StaticImporter::new([(Name::new("test"), source)], parse_trusted); - - let mut to_visit = vec![Name::new("test")]; - let mut visited = HashSet::new(); - - let diagnostics = SymbolCollector::collect_symbols( - &mut engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &mut visited, - &mut importer, - ); - - assert_eq!(diagnostics, vec![]); - - let diagnostics = SymbolResolver::resolve_symbols( - &engine, - &mut relations, - &mut imports, - &Externals::default(), - &mut to_visit, - &visited, - ); - - assert_eq!(diagnostics, vec![]); - - assert_eq!( - relations.iter().map(|(_, r)| r.clone()).collect::>(), - vec![Relation::resolved( - SourceId(1), - ResolvedSymbol::new(ReefId(1), SourceId(0), LocalId(0)), - SymbolRegistry::Objects, - )] - ) - } -} diff --git a/analyzer/src/steps/resolve/diagnostics.rs b/analyzer/src/steps/resolve/diagnostics.rs deleted file mode 100644 index 77eb1f65..00000000 --- a/analyzer/src/steps/resolve/diagnostics.rs +++ /dev/null @@ -1,99 +0,0 @@ -//! contains diagnostics only emitted by the resolution state - -use context::source::SourceSegment; - -use crate::diagnostic::{Diagnostic, DiagnosticID, Observation}; -use crate::engine::Engine; -use crate::environment::Environment; -use crate::imports::SourceImports; -use crate::name::Name; -use crate::reef::ReefId; -use crate::relations::{RelationId, SourceId, SymbolRef}; - -/// Creates a diagnostic for a symbol being invalidated due to it's invalid import bound. -/// The caller must ensure that env_id is valid as well as the given name's root is contained in given env's variables. -pub fn diagnose_invalid_symbol_from_dead_import( - engine: &Engine, - env_id: SourceId, - reef: ReefId, - env_imports: &SourceImports, - relation: RelationId, - name: &Name, -) -> Diagnostic { - let name_root = name.root(); - - let env = engine.get_environment(env_id).expect("invalid env id"); - let segments = env.find_references(SymbolRef::External(relation)); - - let msg = format!("unresolvable symbol `{name}` has no choice but to be ignored due to invalid import of `{name_root}`."); - let invalid_import_seg = env_imports - .get_import_segment(name_root) - .expect("unknown import"); - - let mut segments: Vec = segments - .iter() - .map(|seg| (env_id, reef, seg.clone()).into()) - .collect(); - - segments.sort_by_key(|s| s.location.segment.start); - - Diagnostic::new(DiagnosticID::InvalidSymbol, msg) - .with_observation(Observation::here( - env_id, - reef, - invalid_import_seg, - "invalid import introduced here", - )) - .with_observations(segments) -} - -/// Appends a diagnostic for an external symbol that could not be resolved. -/// -/// Each expression that use this symbol (such as variable references) will then get an observation. -pub fn diagnose_unresolved_external_symbols( - relation: RelationId, - env_id: SourceId, - reef: ReefId, - env: &Environment, - name: &Name, -) -> Diagnostic { - let diagnostic_message = format!("Could not resolve symbol `{name}`."); - - let diagnostic = Diagnostic::new(DiagnosticID::UnknownSymbol, diagnostic_message); - - let mut observations: Vec = env - .list_definitions() - .filter(|(_, sym)| match sym { - SymbolRef::Local(_) => false, - SymbolRef::External(g) => *g == relation, - }) - .map(|(seg, _)| (env_id, reef, seg.clone()).into()) - .collect(); - - observations.sort_by_key(|s| s.location.segment.start); - diagnostic.with_observations(observations) -} - -/// Appends a diagnostic for an import that could not be resolved. -/// Each `use` expressions that was referring to the unknown import will get a diagnostic -pub fn diagnose_unresolved_import( - env_id: SourceId, - reef: ReefId, - imported_symbol_name: &Name, - known_parent: Option, - dependent_segment: SourceSegment, -) -> Diagnostic { - let msg = format!( - "unable to find imported symbol `{}`{}.", - known_parent - .as_ref() - .and_then(|p| imported_symbol_name.relative_to(p)) - .unwrap_or(imported_symbol_name.clone()), - known_parent - .map(|p| format!(" in module `{p}`")) - .unwrap_or_default() - ); - - Diagnostic::new(DiagnosticID::ImportResolution, msg) - .with_observation((env_id, reef, dependent_segment).into()) -} diff --git a/analyzer/src/steps/resolve/import.rs b/analyzer/src/steps/resolve/import.rs deleted file mode 100644 index d00c66be..00000000 --- a/analyzer/src/steps/resolve/import.rs +++ /dev/null @@ -1,203 +0,0 @@ -use crate::diagnostic::{Diagnostic, DiagnosticID, Observation, SourceLocation}; -use crate::engine::Engine; -use crate::environment::symbols::{resolve_loc, SymbolRegistry}; -use crate::environment::Environment; -use crate::imports::{ResolvedImport, SourceImports, UnresolvedImport}; -use crate::name::Name; -use crate::reef::Externals; -use crate::relations::{ResolvedSymbol, SourceId}; -use crate::steps::resolve::{diagnose_unresolved_import, SymbolResolver}; -use std::collections::HashMap; - -impl<'a, 'e> SymbolResolver<'a, 'e> { - /// Attempts to resolve all given unresolved imports, returning a [ResolvedImports] structure containing the - /// imports that could get resolved. - /// This method will append a new diagnostic for each imports that could not be resolved. - pub fn resolve_imports( - externals: &Externals<'a>, - engine: &Engine<'a>, - env_id: SourceId, - imports: &mut SourceImports, - diagnostics: &mut Vec, - ) { - let reef_id = externals.current; - //iterate over our unresolved imports - for (unresolved, segment) in imports.take_unresolved_imports() { - match unresolved { - // If the unresolved import is a symbol - UnresolvedImport::Symbol { alias, loc } => { - // resolve path and targeted reef - - match resolve_loc(&loc, engine, externals) { - None => diagnostics.push( - Diagnostic::new(DiagnosticID::ImportResolution, "Invalid import") - .with_observation(Observation::context( - env_id, - reef_id, - segment, - format!("unable to find reef `{}`", loc.name.root()), - )), - ), - Some((engine, reef_id)) => { - let name = &loc.name; - // try to get referenced module of the reef - let result = get_mod(name, engine); - match result { - None => { - // if the environment wasn't found, and its name was already known, push a diagnostic as it does not exists - let diagnostic = diagnose_unresolved_import( - env_id, - externals.current, - name, - None, - segment.clone(), - ); - diagnostics.push(diagnostic); - imports.set_resolved_import( - alias.unwrap_or(name.simple_name().to_string()), - ResolvedImport::Dead, - segment, - ); - } - //else, try to resolve it - Some((found_env_id, found_env)) => { - let symbol_name = name.simple_name().to_string(); - if found_env.fqn == *name { - //it's the environment that is being imported - imports.set_resolved_import( - alias.unwrap_or(symbol_name), - ResolvedImport::Env { - reef: reef_id, - source: found_env_id, - }, - segment, - ); - continue; - } - - let mut symbols = HashMap::new(); - - for r in [SymbolRegistry::Objects, SymbolRegistry::Types] { - if let Some(symbol_id) = - found_env.symbols.find_exported(&symbol_name, r) - { - symbols.insert( - r, - ResolvedSymbol::new( - reef_id, - found_env_id, - symbol_id, - ), - ); - } - } - - if !symbols.is_empty() { - imports.set_resolved_import( - alias.unwrap_or(symbol_name), - ResolvedImport::Symbols(symbols), - segment, - ); - continue; - } - // the symbol inside the resolved environment could not be found - let diagnostic = diagnose_unresolved_import( - env_id, - externals.current, - name, - Some(found_env.fqn.clone()), - segment.clone(), - ); - imports.set_resolved_import( - alias.unwrap_or(symbol_name), - ResolvedImport::Dead, - segment, - ); - diagnostics.push(diagnostic); - } - } - } - } - } - - //if the unresolved import is an 'AllIn' import, meaning that it imports all symbols from given module - UnresolvedImport::AllIn(loc) => { - match resolve_loc(&loc, engine, externals) { - None => diagnostics.push( - Diagnostic::new( - DiagnosticID::ImportResolution, - format!("unable to find reef `{}`.", loc.name.root()), - ) - .with_observation(Observation::new( - SourceLocation::new(env_id, reef_id, segment), - )), - ), - Some((engine, reef_id)) => { - let name = loc.name; - // try to get referenced environment of the import - match get_mod(&name, engine) { - None => { - // if the environment wasn't found, and its name was already known, push a diagnostic as it does not exists - let diagnostic = diagnose_unresolved_import( - env_id, - externals.current, - &name, - None, - segment.clone(), - ); - diagnostics.push(diagnostic); - } - Some((env_id, env)) => { - let mut symbols_map: HashMap< - String, - HashMap, - > = HashMap::new(); - - for (var_id, var) in env.symbols.exported_symbols() { - let symbols = - symbols_map.entry(var.name.clone()).or_default(); - - for registry in - [SymbolRegistry::Objects, SymbolRegistry::Types] - { - if registry.accepts(var.ty) { - symbols.insert( - registry, - ResolvedSymbol::new(reef_id, env_id, var_id), - ); - break; - } - } - } - - for (var_name, symbols) in symbols_map { - imports.set_resolved_import( - var_name.clone(), - ResolvedImport::Symbols(symbols), - segment.clone(), - ); - } - } - } - } - } - } - } - } - } -} - -/// Gets a module environment from the given fully qualified name, then pop the name until it finds a valid environment. -/// returns None if the name's root could not get resolved -fn get_mod<'a>(name: &Name, engine: &'a Engine) -> Option<(SourceId, &'a Environment)> { - let mut env_name = Some(name.clone()); - while let Some(name) = env_name { - if let Some((id, env)) = engine.find_environment_by_name(&name) { - if env.parent.is_none() { - return Some((id, env)); - } - } - env_name = name.tail(); - } - None -} diff --git a/analyzer/src/steps/resolve/symbol.rs b/analyzer/src/steps/resolve/symbol.rs deleted file mode 100644 index a33cb878..00000000 --- a/analyzer/src/steps/resolve/symbol.rs +++ /dev/null @@ -1,134 +0,0 @@ -use crate::engine::Engine; -use crate::environment::symbols::SymbolRegistry; -use crate::environment::Environment; -use crate::imports::{ResolvedImport, SourceImports}; -use crate::name::Name; -use crate::reef::{Externals, ReefId}; -use crate::relations::{ResolvedSymbol, SourceId}; - -/// The result of a symbol resolution attempt -#[derive(PartialEq)] -pub enum SymbolResolutionResult { - /// The symbol is resolved, where `ResolvedSymbol` is the resolved symbol - Resolved(ResolvedSymbol), - /// The symbol is resolved, but it's invalid. This result usually implies a diagnostic emission. - /// Where the `ResolvedSymbol` is the resolved symbol - Invalid(ResolvedSymbol), - /// The symbol is imported but its import was invalidated - DeadImport, - /// The symbol could not be found. - NotFound, -} - -pub fn resolve_symbol_from_locals( - env_id: SourceId, - env: &Environment, - symbol_name: &Name, - current_reef: ReefId, - registry: SymbolRegistry, -) -> SymbolResolutionResult { - if let Some(var_id) = env.symbols.find_exported(symbol_name.root(), registry) { - let symbol = ResolvedSymbol { - reef: current_reef, - source: env_id, - object_id: var_id, - }; - if symbol_name.is_qualified() { - return SymbolResolutionResult::Invalid(symbol); - } - return SymbolResolutionResult::Resolved(symbol); - } - if registry != SymbolRegistry::Objects { - return SymbolResolutionResult::NotFound; - } - - // If the symbol is an object, but it exists a type symbol, return it : - // if the symbol should not be a type, the analyzer will report it. - // It exists a case where a type can be used as a function call: when calling a type constructor. - if let Some(var_id) = env - .symbols - .find_exported(symbol_name.root(), SymbolRegistry::Types) - { - let symbol = ResolvedSymbol { - reef: current_reef, - source: env_id, - object_id: var_id, - }; - return SymbolResolutionResult::Resolved(symbol); - } - - SymbolResolutionResult::NotFound -} - -pub fn resolve_absolute_symbol( - engine: &Engine, - name: &Name, - reef: ReefId, - registry: SymbolRegistry, -) -> SymbolResolutionResult { - // As we could not resolve the symbol using imports, try to find the symbol from - // an absolute qualified name - let env_name = name.tail().unwrap_or(name.clone()); - let env_result = engine.find_environment_by_name(&env_name); - - if let Some((env_id, env)) = env_result { - return resolve_symbol_from_locals( - env_id, - env, - &Name::new(name.simple_name()), - reef, - registry, - ); - } - SymbolResolutionResult::NotFound -} - -/// resolves the symbols of given environment, using given resolved imports for external symbol references. -pub fn resolve_symbol_from_imports( - engine: &Engine, - imports: &SourceImports, - name: &Name, - externals: &Externals, - registry: SymbolRegistry, -) -> SymbolResolutionResult { - let name_root = name.root(); - - // try to resolve the relation by looking the name's root inside imports - match imports.get_import(name_root) { - Some(ResolvedImport::Symbols(resolved_symbol)) => { - if let Some(resolved_symbol) = resolved_symbol.get(®istry) { - return if !name.is_qualified() { - SymbolResolutionResult::Resolved(*resolved_symbol) - } else { - SymbolResolutionResult::Invalid(*resolved_symbol) - }; - } - } - Some(ResolvedImport::Env { - reef: resolved_reef, - source: resolved_module, - }) => { - let env = if *resolved_reef == externals.current { - engine - } else { - &externals - .get_reef(*resolved_reef) - .expect("resolved import points to an unknown reef") - .engine - } - .get_environment(*resolved_module) - .expect("resolved import points to an unknown environment"); - - return resolve_symbol_from_locals( - *resolved_module, - env, - &Name::new(name.simple_name()), - *resolved_reef, - registry, - ); - } - Some(ResolvedImport::Dead) => return SymbolResolutionResult::DeadImport, - None => {} //simply fallback to NotFound - }; - SymbolResolutionResult::NotFound -} diff --git a/analyzer/src/steps/shared_diagnostics.rs b/analyzer/src/steps/shared_diagnostics.rs deleted file mode 100644 index 65604767..00000000 --- a/analyzer/src/steps/shared_diagnostics.rs +++ /dev/null @@ -1,35 +0,0 @@ -//! contains diagnostics that can be emitted by any step - -use context::source::SourceSegment; - -use crate::diagnostic::{Diagnostic, DiagnosticID, Observation, SourceLocation}; -use crate::environment::symbols::SymbolInfo; -use crate::name::Name; -use crate::reef::ReefId; -use crate::relations::SourceId; - -pub fn diagnose_invalid_symbol( - base_type: SymbolInfo, - env_id: SourceId, - reef: ReefId, - name: &Name, - segments: &[SourceSegment], -) -> Diagnostic { - let name_root = name.root(); - let (_, tail) = name.parts().split_first().unwrap(); - let base_type_name = base_type.to_string(); - let msg = format!("`{name_root}` is a {base_type_name} which cannot export any inner symbols"); - - let mut observations: Vec<_> = segments - .iter() - .map(|seg| Observation::new(SourceLocation::new(env_id, reef, seg.clone()))) - .collect(); - observations.sort_by_key(|s| s.location.segment.start); - - Diagnostic::new(DiagnosticID::InvalidSymbol, msg) - .with_observations(observations) - .with_help(format!( - "`{}` is an invalid symbol in {base_type_name} `{name_root}`", - Name::from(tail) - )) -} diff --git a/analyzer/src/steps/typing.rs b/analyzer/src/steps/typing.rs deleted file mode 100644 index 7c126af8..00000000 --- a/analyzer/src/steps/typing.rs +++ /dev/null @@ -1,3086 +0,0 @@ -use std::str::FromStr; - -use ast::call::{Call, Detached, Pipeline, ProgrammaticCall, RedirOp, Redirected}; -use ast::control_flow::If; -use ast::function::FunctionDeclaration; -use ast::group::{Block, Subshell}; -use ast::operation::{BinaryOperation, BinaryOperator, UnaryOperation, UnaryOperator}; -use ast::r#type::CastedExpr; -use ast::r#use::InclusionPathItem; -use ast::range::{Iterable, Subscript}; -use ast::substitution::Substitution; -use ast::value::{Literal, LiteralValue, TemplateString}; -use ast::variable::{ - Assign, Identifier, Path, Tilde, VarDeclaration, VarKind, VarName, VarReference, -}; -use ast::Expr; -use context::source::{SourceSegment, SourceSegmentHolder}; - -use crate::dependency::topological_sort; -use crate::diagnostic::{Diagnostic, DiagnosticID, Observation}; -use crate::engine::Engine; -use crate::reef::{Externals, ReefId}; -use crate::relations::{Relations, SourceId, SymbolRef}; -use crate::steps::typing::assign::{ - ascribe_assign_rhs, ascribe_assign_subscript, create_subscript, -}; -use crate::steps::typing::bounds::TypesBounds; -use crate::steps::typing::coercion::{ - check_type_annotation, coerce_condition, convert_description, convert_expression, convert_many, - is_compatible, resolve_type_annotation, -}; -use crate::steps::typing::exploration::{Exploration, Links}; -use crate::steps::typing::function::{ - declare_function, find_operand_implementation, infer_return, type_call, type_method, Return, -}; -use crate::steps::typing::iterable::ascribe_for; -use crate::steps::typing::lower::{convert_into_string, generate_unwrap}; -use crate::steps::typing::magic::{is_magic_variable_name, prepend_implicits}; -use crate::steps::typing::structure::{ - ascribe_field_access, ascribe_field_assign, ascribe_struct_declaration, -}; -use crate::types::builtin::{BOOL_STRUCT, STRING_STRUCT}; -use crate::types::ctx::{TypeContext, TypedVariable}; -use crate::types::engine::{Chunk, ChunkKind, TypedEngine}; -use crate::types::hir::{ - Conditional, Convert, Declaration, ExprKind, FunctionCall, LocalAssignment, Loop, MethodCall, - Redir, Redirect, Subprocess, Substitute, TypedExpr, Var, -}; -use crate::types::operator::name_operator_method; -use crate::types::ty::{FunctionDesc, Type, TypeRef}; -use crate::types::{ - builtin, Typing, BOOL, ERROR, EXITCODE, FLOAT, GLOB, INT, NOTHING, PID, STRING, UNIT, -}; - -mod assign; -mod bounds; -mod coercion; -mod exploration; -mod function; -mod lower; -mod structure; -mod view; - -mod iterable; -pub mod magic; - -pub fn apply_types( - engine: &Engine, - relations: &Relations, - externals: &Externals, - diagnostics: &mut Vec, -) -> (TypedEngine, TypeContext, Typing) { - let dependencies = relations.as_dependencies(externals.current, engine); - let environments = topological_sort(&dependencies); - - let mut exploration = Exploration { - type_engine: TypedEngine::new(engine.len()), - typing: Typing::default(), - ctx: TypeContext::default(), - returns: Vec::new(), - externals, - }; - - for env_id in environments { - if let Some(entry) = - apply_types_to_source(&mut exploration, diagnostics, engine, relations, env_id) - { - exploration.type_engine.insert(env_id, entry); - } - } - (exploration.type_engine, exploration.ctx, exploration.typing) -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Default)] -enum ExpressionValue { - /// The value of the expression is not used - #[default] - Unused, - /// The value of the expression is used but its type is not specified - Unspecified, - /// The value of the expression is used and is of expected type - Expected(TypeRef), -} - -/// A state holder, used to informs the type checker about what should be -/// checked. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Default)] -struct TypingState { - // if not in loop, `continue` and `break` will raise a diagnostic - in_loop: bool, - - local_value: ExpressionValue, -} - -impl TypingState { - /// Creates a new initial state, for a script. - fn new() -> Self { - Self::default() - } - - /// Returns a new state with given expression value. - fn with_local_value(self, v: ExpressionValue) -> Self { - Self { - local_value: v, - ..self - } - } - - /// Returns a new state with `in_loop` set to true - fn with_in_loop(self) -> Self { - Self { - in_loop: true, - ..self - } - } -} - -fn verify_free_function( - func: &FunctionDeclaration, - externals: &Externals, - func_id: SourceId, - diagnostics: &mut Vec, -) { - if func.body.is_some() { - return; - } - - // only first reef (after lang) can define native functions - if externals.current == ReefId(1) { - return; - } - - diagnostics.push( - Diagnostic::new( - DiagnosticID::NoFunctionDefinition, - "function without a body", - ) - .with_observation(Observation::context( - func_id, - externals.current, - func.segment(), - "provide a definition for this function", - )), - ); -} - -fn apply_types_to_source( - exploration: &mut Exploration, - diagnostics: &mut Vec, - engine: &Engine, - relations: &Relations, - source_id: SourceId, -) -> Option { - let links = Links { - source: source_id, - engine, - relations, - }; - let expr = engine.get_expression(source_id).unwrap(); - exploration.prepare(); - match expr { - Expr::FunctionDeclaration(func) => { - // Take any previous forward declaration if present. - let forward_declaration = exploration.type_engine.take_user(source_id); - - let base_chunk = forward_declaration - .unwrap_or_else(|| declare_function(func, exploration, links, diagnostics)); - - let function_id = base_chunk.function_id; - let function_type = base_chunk.function_type; - - let chunk_function = exploration.type_engine.get_function(function_id).unwrap(); - let expected_return_type = chunk_function.return_type; - - let typed_body = func.body.as_ref().map(|body| { - ascribe_types( - exploration, - links, - diagnostics, - body, - TypingState::default() - .with_local_value(ExpressionValue::Expected(expected_return_type)), - ) - }); - - let return_type = infer_return( - func, - expected_return_type, - links, - typed_body.as_ref(), - diagnostics, - exploration, - ); - - // update function's return type. - let function_mut = exploration - .type_engine - .get_function_mut(function_id) - .unwrap(); - function_mut.return_type = return_type; - - match typed_body { - Some(body) => Some(Chunk { - function_id, - function_type, - kind: ChunkKind::DefinedFunction(Some(body)), - }), - None => { - verify_free_function(func, exploration.externals, source_id, diagnostics); - Some(Chunk { - function_id, - function_type, - kind: ChunkKind::DeclaredFunction, - }) - } - } - } - Expr::StructDeclaration(_) => None, - expr => { - exploration - .ctx - .init_locals(links.source, links.env().symbols.len()); - - let expression = - ascribe_types(exploration, links, diagnostics, expr, TypingState::new()); - - let script_fn_id = exploration.type_engine.add_function(FunctionDesc::script()); - let script_fn_name = links.env().fqn.to_string(); - let function_type = exploration.typing.add_type( - Type::Function(Some(links.source), script_fn_id), - Some(script_fn_name), - ); - - let expression = prepend_implicits(expression, exploration, links); - - Some(Chunk { - function_id: script_fn_id, - function_type, - kind: ChunkKind::DefinedFunction(Some(expression)), - }) - } - } -} - -fn ascribe_literal(lit: &Literal) -> TypedExpr { - let ty = match lit.parsed { - LiteralValue::Int(_) => INT, - LiteralValue::Float(_) => FLOAT, - LiteralValue::String(_) => STRING, - LiteralValue::Bool(_) => BOOL, - }; - TypedExpr { - kind: ExprKind::Literal(lit.parsed.clone()), - ty, - segment: lit.segment.clone(), - } -} - -fn ascribe_template_string( - tpl: &TemplateString, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - if tpl.parts.is_empty() { - return TypedExpr { - kind: ExprKind::Literal(LiteralValue::String(String::new())), - ty: STRING, - segment: tpl.segment(), - }; - } - - let lang = exploration.externals.lang(); - let (_, plus_method_id) = lang - .typed_engine - .get_method_exact( - STRING_STRUCT, - name_operator_method(BinaryOperator::Plus), - &[STRING], - STRING, - ) - .expect("string type should have a concatenation method"); - - let mut it = tpl.parts.iter().map(|part| { - let typed_part = ascribe_types( - exploration, - links, - diagnostics, - part, - state.with_local_value(ExpressionValue::Unused), - ); - convert_into_string(typed_part, exploration, diagnostics, links.source) - }); - let acc = it.next().unwrap(); - it.fold(acc, |acc, current| { - let segment = current.segment.clone(); - TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(acc), - arguments: vec![current], - function_id: plus_method_id, - }), - ty: STRING, - segment, - } - }) -} - -fn ascribe_assign( - assign: &Assign, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let symbol = links.env().get_raw_symbol(assign.left.segment()); - - let actual_type_ref = symbol.map(|symbol| { - exploration - .ctx - .get(links.relations, links.source, symbol) - .unwrap() - .type_ref - }); - - if let Expr::Subscript(sub) = assign.left.as_ref() { - return ascribe_assign_subscript(assign, sub, exploration, links, diagnostics, state); - } - if let Expr::FieldAccess(field) = assign.left.as_ref() { - return ascribe_field_assign(assign, field, exploration, links, diagnostics, state); - } - - let rhs = ascribe_assign_rhs( - assign, - exploration, - links, - diagnostics, - state.with_local_value( - actual_type_ref.map_or(ExpressionValue::Unspecified, ExpressionValue::Expected), - ), - ); - - // actual_type_ref is some if symbol is some - let Some((symbol, actual_type_ref)) = symbol.map(|s| (s, actual_type_ref.unwrap())) else { - diagnostics.push( - Diagnostic::new( - DiagnosticID::InvalidAssignment, - "Invalid left-hand side of assignment", - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - assign.left.segment(), - "Cannot assign to this expression", - )), - ); - return rhs; - }; - - let actual_type = exploration.get_type(actual_type_ref).unwrap(); - if actual_type.is_named() { - diagnostics.push( - Diagnostic::new( - DiagnosticID::TypeMismatch, - if let Some(name) = assign.name() { - format!("Named object `{}` cannot be assigned like a variable", name) - } else { - "Expression cannot be assigned like a variable".to_owned() - }, - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - assign.segment(), - "Assignment happens here", - )), - ); - return rhs; - } - let var_obj = exploration - .ctx - .get(links.relations, links.source, symbol) - .unwrap(); - let var_ty = var_obj.type_ref; - let rhs_type = rhs.ty; - - let rhs = match convert_expression( - rhs, - var_ty, - &mut TypesBounds::inactive(), - exploration, - links.source, - diagnostics, - ) { - Ok(rhs) => rhs, - Err(_) => { - diagnostics.push( - Diagnostic::new( - DiagnosticID::TypeMismatch, - format!( - "Cannot assign a value of type `{}` to something of type `{}`", - exploration.new_type_view(rhs_type, &TypesBounds::inactive()), - exploration.new_type_view(var_ty, &TypesBounds::inactive()), - ), - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - assign.segment(), - "Assignment happens here", - )), - ); - TypedExpr { - kind: ExprKind::Literal(LiteralValue::String("".to_owned())), - ty: STRING, - segment: assign.segment(), - } - } - }; - - if !var_obj.can_reassign { - diagnostics.push( - Diagnostic::new( - DiagnosticID::CannotReassign, - if let Some(name) = assign.name() { - format!("Cannot assign twice to immutable variable `{}`", name) - } else { - "Cannot reassign immutable expression".to_owned() - }, - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - assign.segment(), - "Assignment happens here", - )), - ); - } - - let identifier = match symbol { - SymbolRef::Local(id) => Var::Local(id), - SymbolRef::External(id) => Var::External( - links.relations[id] - .state - .expect_resolved("non resolved relation"), - ), - }; - - TypedExpr { - kind: ExprKind::LocalAssign(LocalAssignment { - identifier, - rhs: Box::new(rhs), - }), - ty: UNIT, - segment: assign.segment(), - } -} - -fn ascribe_var_declaration( - decl: &VarDeclaration, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let ast_type_hint = decl.var.ty.as_ref(); - let type_hint = - ast_type_hint.map(|ty| resolve_type_annotation(exploration, links, ty, diagnostics)); - - let mut initializer = match decl.initializer.as_ref() { - Some(expr) => ascribe_types( - exploration, - links, - diagnostics, - expr, - state.with_local_value( - type_hint.map_or(ExpressionValue::Unspecified, ExpressionValue::Expected), - ), - ), - None => { - diagnostics.push( - Diagnostic::new( - DiagnosticID::UnsupportedFeature, - "Variables without initializers are not supported yet", - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - decl.segment(), - "Variable declaration happens here", - )), - ); - TypedExpr::error(decl.segment()) - } - }; - - if let Some(type_ref) = type_hint { - initializer = check_type_annotation( - exploration, - type_ref, - ast_type_hint.unwrap().segment(), - &mut TypesBounds::inactive(), - initializer, - links, - diagnostics, - ); - } - - let id = links.env().get_raw_symbol(decl.segment()).unwrap(); - - let SymbolRef::Local(id) = id else { - unreachable!() - }; - - exploration.ctx.set_local( - links.source, - id, - if decl.kind == VarKind::Val { - TypedVariable::immutable(initializer.ty) - } else { - TypedVariable::assignable(initializer.ty) - }, - ); - TypedExpr { - kind: ExprKind::Declare(Declaration { - identifier: id, - value: Some(Box::new(initializer)), - }), - ty: UNIT, - segment: decl.segment.clone(), - } -} - -fn ascribe_magic_var_reference( - var_ref: &VarReference, - exploration: &Exploration, - links: Links, -) -> Option { - let var_name = var_ref.name.name(); - if !is_magic_variable_name(var_name) { - return None; - } - - let program_arguments_variable = links.env().get_raw_symbol(var_ref.segment()).unwrap(); - - let pargs_var = match program_arguments_variable { - SymbolRef::Local(l) => Var::Local(l), - SymbolRef::External(e) => Var::External( - links.relations[e] - .state - .expect_resolved("unresolved magic variable"), - ), - }; - - let parg_reference_expression = TypedExpr { - kind: ExprKind::Reference(pargs_var), - ty: builtin::STRING_VEC, //program arguments is of type Vec[String] - segment: var_ref.segment(), - }; - - match var_name { - "#" => { - let (_, len_method_id) = exploration - .get_method_exact(parg_reference_expression.ty, "len", &[], INT) - .expect("Vec#len(): Int method not found"); - - Some(TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(parg_reference_expression), - arguments: vec![], - function_id: len_method_id, - }), - ty: INT, - segment: var_ref.segment(), - }) - } - "*" | "@" => Some(parg_reference_expression), - _ => { - let Ok(offset) = u32::from_str(var_name) else { - return None; - }; - - let (_, index_method_id) = exploration - .get_method_exact( - parg_reference_expression.ty, - "[]", - &[INT], - builtin::GENERIC_PARAMETER_1, - ) - .expect("Vec#[Int]: T method not found"); - - Some(TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(parg_reference_expression), - arguments: vec![TypedExpr { - kind: ExprKind::Literal(LiteralValue::Int(offset as i64)), - ty: INT, - segment: var_ref.segment(), - }], - function_id: index_method_id, - }), - ty: STRING, - segment: var_ref.segment(), - }) - } - } -} - -fn ascribe_var_reference( - var_ref: &VarReference, - links: Links, - exploration: &Exploration, -) -> TypedExpr { - if let Some(magic_ref) = ascribe_magic_var_reference(var_ref, exploration, links) { - return magic_ref; - } - - let symbol = links.env().get_raw_symbol(var_ref.segment()).unwrap(); - // let foo: Option = match symbol { - // SymbolRef::Local(local) => { - // links.env().symbols.get(local) - // } - // SymbolRef::External(id) => { - // let relation = &links.relations[id]; - // match relation.state { - // RelationState::Resolved(resolved) => { - // if resolved.reef == exploration.externals.current { - // links.engine.get_environment(resolved.source).unwrap() - // } else { - // exploration.externals.get_reef(resolved.reef).unwrap().engine.get_environment(resolved.source).unwrap() - // }.symbols.get(resolved.object_id) - // } - // _ => None - // } - // } - // }.map(|symbol| symbol.ty); - let type_ref = exploration - .get_var(links.source, symbol, links.relations) - .unwrap() - .type_ref; - - let var = match symbol { - SymbolRef::Local(id) => Var::Local(id), - SymbolRef::External(id) => Var::External( - links.relations[id] - .state - .expect_resolved("non resolved relation"), - ), - }; - - TypedExpr { - kind: ExprKind::Reference(var), - ty: type_ref, - segment: var_ref.segment.clone(), - } -} - -fn ascribe_identifier(ident: &Path, links: Links, exploration: &Exploration) -> TypedExpr { - ascribe_var_reference( - &VarReference { - name: VarName::User(ident.path.last().unwrap().name().into()), - segment: ident.segment(), - }, - links, - exploration, - ) -} - -fn ascribe_block( - block: &Block, - links: Links, - exploration: &mut Exploration, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let mut expressions = Vec::with_capacity(block.expressions.len()); - let mut it = block - .expressions - .iter() - .filter(|expr| !matches!(expr, Expr::Use(_))) - .peekable(); - - while let Some(expr) = it.next() { - expressions.push(ascribe_types( - exploration, - links, - diagnostics, - expr, - if it.peek().is_some() { - state.with_local_value(ExpressionValue::Unused) - } else { - state - }, - )); - } - let ty = expressions.last().map_or(UNIT, |expr| expr.ty); - TypedExpr { - kind: ExprKind::Block(expressions), - ty, - segment: block.segment.clone(), - } -} - -fn ascribe_redirected( - redirected: &Redirected, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let expr = ascribe_types(exploration, links, diagnostics, &redirected.expr, state); - - let mut redirections = Vec::with_capacity(redirected.redirections.len()); - for redirection in &redirected.redirections { - let operand = ascribe_types(exploration, links, diagnostics, &redirection.operand, state); - let operand = if matches!(redirection.operator, RedirOp::FdIn | RedirOp::FdOut) { - if operand.ty != INT { - diagnostics.push( - Diagnostic::new( - DiagnosticID::TypeMismatch, - format!( - "File descriptor redirections must be given an integer, not `{}`", - exploration.new_type_view(operand.ty, &TypesBounds::inactive()), - ), - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - redirection.segment(), - "Redirection happens here", - )), - ); - } - operand - } else { - convert_into_string(operand, exploration, diagnostics, links.source) - }; - redirections.push(Redir { - fd: redirection.fd, - operator: redirection.operator, - operand: Box::new(operand), - }); - } - let ty = expr.ty; - TypedExpr { - kind: ExprKind::Redirect(Redirect { - expression: Box::new(expr), - redirections, - }), - ty, - segment: redirected.segment(), - } -} - -fn ascribe_pipeline( - pipeline: &Pipeline, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let mut commands = Vec::with_capacity(pipeline.commands.len()); - for command in &pipeline.commands { - commands.push(ascribe_types( - exploration, - links, - diagnostics, - command, - state, - )); - } - TypedExpr { - kind: ExprKind::Pipeline(commands), - ty: EXITCODE, - segment: pipeline.segment(), - } -} - -fn ascribe_substitution( - substitution: &Substitution, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let state = state.with_local_value(ExpressionValue::Unused); - let commands = substitution - .underlying - .expressions - .iter() - .map(|command| ascribe_types(exploration, links, diagnostics, command, state)) - .collect::>(); - TypedExpr { - kind: match substitution.kind { - ast::substitution::SubstitutionKind::Capture => ExprKind::Capture(commands), - ast::substitution::SubstitutionKind::Process { direction } => { - ExprKind::Substitute(match direction { - ast::substitution::Direction::Input => Substitute::In(commands), - ast::substitution::Direction::Output => Substitute::Out(commands), - }) - } - }, - ty: STRING, - segment: substitution.segment(), - } -} - -fn ascribe_detached( - detached: &Detached, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let expr = ascribe_types( - exploration, - links, - diagnostics, - &detached.underlying, - state.with_local_value(ExpressionValue::Unused), - ); - TypedExpr { - kind: ExprKind::Subprocess(Subprocess { - inner: Box::new(expr), - awaited: false, - }), - ty: PID, - segment: detached.segment(), - } -} - -fn ascribe_subshell( - subshell: &Subshell, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let block = subshell - .expressions - .iter() - .map(|expr| { - ascribe_types( - exploration, - links, - diagnostics, - expr, - state.with_local_value(ExpressionValue::Unused), - ) - }) - .collect::>(); - TypedExpr { - kind: ExprKind::Subprocess(Subprocess { - inner: Box::new(TypedExpr { - kind: ExprKind::Block(block), - ty: UNIT, - segment: subshell.segment.clone(), - }), - awaited: true, - }), - ty: EXITCODE, - segment: subshell.segment(), - } -} - -fn ascribe_return( - ret: &ast::function::Return, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let expr = ret - .expr - .as_ref() - .map(|expr| Box::new(ascribe_types(exploration, links, diagnostics, expr, state))); - exploration.returns.push(Return { - ty: expr.as_ref().map_or(UNIT, |expr| expr.ty), - segment: ret.segment.clone(), - }); - TypedExpr { - kind: ExprKind::Return(expr), - ty: NOTHING, - segment: ret.segment.clone(), - } -} - -fn ascribe_function_declaration( - fun: &FunctionDeclaration, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, -) -> TypedExpr { - let function_source = links.env().get_raw_env(fun.segment()).unwrap(); - - // check if the function declaration is already known - if exploration.type_engine.get_user(function_source).is_none() { - // if not, forward declare it by typing its declared signature - let declaration_link = links.with_source(function_source); - let forward_declared_chunk = - declare_function(fun, exploration, declaration_link, diagnostics); - exploration - .type_engine - .insert(function_source, forward_declared_chunk); - } - - let chunk = exploration.type_engine.get_user(function_source).unwrap(); - - let type_ref = TypeRef::new(exploration.externals.current, chunk.function_type); - - let id = links.env().get_raw_symbol(fun.segment()).unwrap(); - - let SymbolRef::Local(local_id) = id else { - unreachable!() - }; - - exploration - .ctx - .set_local_typed(links.source, local_id, type_ref); - - TypedExpr { - kind: ExprKind::Declare(Declaration { - identifier: local_id, - value: None, - }), - ty: UNIT, - segment: fun.segment.clone(), - } -} - -fn ascribe_binary( - bin: &BinaryOperation, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let left_expr = ascribe_types(exploration, links, diagnostics, &bin.left, state); - let right_expr = ascribe_types(exploration, links, diagnostics, &bin.right, state); - let left_type = left_expr.ty; - let right_type = right_expr.ty; - let name = name_operator_method(bin.op); - - let methods = exploration - .get_methods(left_expr.ty, name) - .map(|methods| methods.as_slice()) - .unwrap_or(&[]); - - let method = - find_operand_implementation(exploration, left_type.reef, methods, left_expr, right_expr); - match method { - Ok(method) => TypedExpr { - ty: method.return_type, - kind: ExprKind::MethodCall(method.into()), - segment: bin.segment(), - }, - Err(left) => { - diagnostics.push( - Diagnostic::new(DiagnosticID::UnknownMethod, "Undefined operator") - .with_observation(Observation::here( - links.source, - exploration.externals.current, - bin.segment(), - format!( - "No operator `{}` between type `{}` and `{}`", - name, - exploration.new_type_view(left_type, &TypesBounds::inactive()), - exploration.new_type_view(right_type, &TypesBounds::inactive()), - ), - )), - ); - left - } - } -} - -fn ascribe_subscript( - sub: &Subscript, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - match create_subscript(sub, exploration, links, diagnostics, state) { - Ok(method) => TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(method.left), - arguments: vec![method.right], - function_id: method.function_id, - }), - ty: method.return_type, - segment: sub.segment(), - }, - Err(target) => target, - } -} - -fn ascribe_range( - range: &Iterable, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - match range { - Iterable::Files(files) => { - let mut pattern = ascribe_types( - exploration, - links, - diagnostics, - &files.pattern, - state.with_local_value(ExpressionValue::Expected(STRING)), - ); - if pattern.ty == STRING { - pattern.ty = GLOB; - } else if pattern.ty.is_ok() { - panic!("pattern should be of type String"); - } - pattern - } - Iterable::Range(range) => { - let state = state.with_local_value(ExpressionValue::Expected(INT)); - let start = ascribe_types(exploration, links, diagnostics, &range.start, state); - let end = ascribe_types(exploration, links, diagnostics, &range.end, state); - let step = range - .step - .as_ref() - .map(|step| ascribe_types(exploration, links, diagnostics, step, state)) - .unwrap_or_else(|| TypedExpr { - kind: ExprKind::Literal(LiteralValue::Int(1)), - ty: INT, - segment: range.segment(), - }); - - let args = [&start, &end, &step]; - let not_integers = args - .into_iter() - .filter(|expr| !is_compatible(exploration, INT, expr.ty)) - .collect::>(); - if !not_integers.is_empty() { - let mut diagnostic = - Diagnostic::new(DiagnosticID::TypeMismatch, "Invalid integer range"); - for &expr in ¬_integers { - diagnostic = diagnostic.with_observation(Observation::here( - links.source, - exploration.externals.current, - expr.segment(), - format!( - "Got `{}`", - exploration.new_type_view(expr.ty, &TypesBounds::inactive()), - ), - )); - } - diagnostics.push(diagnostic); - return start.poison(); - } - - let symbol = links.env().get_raw_symbol(range.segment()).unwrap(); - let function_type_ref = exploration - .get_var(links.source, symbol, links.relations) - .unwrap() - .type_ref; - let Type::Structure(structure_source, _) = - exploration.get_type(function_type_ref).unwrap() - else { - unreachable!() - }; - let constructor_id = exploration - .get_methods(function_type_ref, "") - .unwrap()[0]; - let function = exploration.get_function(ReefId(1), constructor_id).unwrap(); - TypedExpr { - kind: ExprKind::FunctionCall(FunctionCall { - function_id: constructor_id, - arguments: vec![start, end, step], - reef: ReefId(1), - source_id: *structure_source, - }), - ty: function.return_type, - segment: range.segment(), - } - } - } -} - -fn ascribe_tilde( - tilde: &ast::variable::TildeExpansion, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let pfc = ProgrammaticCall { - path: vec![InclusionPathItem::Symbol(Identifier::new( - "~".into(), - tilde.segment().start, - ))], - segment: tilde.segment(), - arguments: match &tilde.structure { - Tilde::HomeDir(Some(username)) => vec![username.as_ref().clone()], - Tilde::HomeDir(None) | Tilde::WorkingDir => Vec::new(), - }, - type_parameters: vec![], - }; - let typed = ascribe_pfc(&pfc, exploration, links, diagnostics, state); - generate_unwrap(typed, exploration) -} - -fn ascribe_casted( - casted: &CastedExpr, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let expr = ascribe_types(exploration, links, diagnostics, &casted.expr, state); - - let ty = resolve_type_annotation(exploration, links, &casted.casted_type, diagnostics); - if ty.is_err() { - return expr; - } - - if expr.ty.is_ok() - && convert_description(exploration, ty, expr.ty, &mut TypesBounds::inactive(), true) - .is_err() - { - diagnostics.push( - Diagnostic::new( - DiagnosticID::IncompatibleCast, - format!( - "Casting `{}` as `{}` is invalid", - exploration.new_type_view(expr.ty, &TypesBounds::inactive()), - exploration.new_type_view(ty, &TypesBounds::inactive()), - ), - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - casted.segment(), - "Incompatible cast", - )), - ); - } - TypedExpr { - kind: ExprKind::Convert(Convert { - inner: Box::new(expr), - into: ty, - }), - ty, - segment: casted.segment(), - } -} - -fn ascribe_unary( - unary: &UnaryOperation, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let expr = ascribe_types( - exploration, - links, - diagnostics, - &unary.expr, - state.with_local_value(ExpressionValue::Unspecified), - ); - if expr.ty.is_err() { - return expr; - } - - match unary.op { - UnaryOperator::Not => ascribe_not(expr, unary.segment(), exploration, links, diagnostics), - UnaryOperator::Negate => { - let method = exploration.get_method_exact(expr.ty, "neg", &[], expr.ty); - - match method { - Some((method, method_id)) => TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(expr), - arguments: vec![], - function_id: method_id, - }), - ty: method.return_type, - segment: unary.segment(), - }, - None => { - diagnostics.push( - Diagnostic::new(DiagnosticID::UnknownMethod, "Cannot negate type") - .with_observation(Observation::here( - links.source, - exploration.externals.current, - unary.segment(), - format!( - "`{}` does not implement the `neg` method", - exploration.new_type_view(expr.ty, &TypesBounds::inactive()), - ), - )), - ); - expr - } - } - } - } -} - -fn ascribe_not( - not: TypedExpr, - segment: SourceSegment, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, -) -> TypedExpr { - let lang = exploration.externals.lang(); - let (not_method, not_method_id) = lang - .typed_engine - .get_method_exact(BOOL_STRUCT, "not", &[], BOOL) - .expect("A Bool should be invertible"); - match convert_expression( - not, - BOOL, - &mut TypesBounds::inactive(), - exploration, - links.source, - diagnostics, - ) { - Ok(expr) => TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(expr), - arguments: vec![], - function_id: not_method_id, - }), - ty: not_method.return_type, - segment, - }, - Err(expr) => { - diagnostics.push( - Diagnostic::new(DiagnosticID::TypeMismatch, "Cannot invert type").with_observation( - Observation::here( - links.source, - exploration.externals.current, - segment, - format!( - "Cannot invert non-boolean type `{}`", - exploration.new_type_view(expr.ty, &TypesBounds::inactive()), - ), - ), - ), - ); - expr - } - } -} - -fn ascribe_if( - block: &If, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let condition = ascribe_types(exploration, links, diagnostics, &block.condition, state); - - let current_reef = exploration.externals.current; - - let condition = coerce_condition(condition, exploration, links.source, diagnostics); - let mut then = ascribe_types( - exploration, - links, - diagnostics, - &block.success_branch, - state, - ); - - let mut otherwise = block - .fail_branch - .as_ref() - .map(|expr| ascribe_types(exploration, links, diagnostics, expr, state)); - - let ty = if state.local_value != ExpressionValue::Unused { - match convert_many( - exploration, - &mut TypesBounds::inactive(), - [then.ty, otherwise.as_ref().map_or(UNIT, |expr| expr.ty)], - ) { - Ok(ty) => { - // Generate appropriate casts and implicits conversions - then = convert_expression( - then, - ty, - &mut TypesBounds::inactive(), - exploration, - links.source, - diagnostics, - ) - .expect("Type mismatch should already have been caught"); - otherwise = otherwise.map(|expr| { - convert_expression( - expr, - ty, - &mut TypesBounds::inactive(), - exploration, - links.source, - diagnostics, - ) - .expect("Type mismatch should already have been caught") - }); - ty - } - Err(_) => { - let mut diagnostic = Diagnostic::new( - DiagnosticID::TypeMismatch, - "`if` and `else` have incompatible types", - ) - .with_observation(Observation::here( - links.source, - current_reef, - block.success_branch.segment(), - format!( - "Found `{}`", - exploration.new_type_view(then.ty, &TypesBounds::inactive()), - ), - )); - if let Some(otherwise) = &otherwise { - diagnostic = diagnostic.with_observation(Observation::here( - links.source, - current_reef, - otherwise.segment(), - format!( - "Found `{}`", - exploration.new_type_view(otherwise.ty, &TypesBounds::inactive()), - ), - )); - } - diagnostics.push(diagnostic); - ERROR - } - } - } else { - UNIT - }; - TypedExpr { - kind: ExprKind::Conditional(Conditional { - condition: Box::new(condition), - then: Box::new(then), - otherwise: otherwise.map(Box::new), - }), - ty, - segment: block.segment.clone(), - } -} - -fn ascribe_call( - call: &Call, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - if let [Expr::Literal(Literal { - parsed: LiteralValue::String(cmd), - segment, - }), ..] = call.arguments.as_slice() - { - if cmd.as_str() == "cd" { - let pfc = ProgrammaticCall { - path: vec![InclusionPathItem::Symbol(Identifier::new( - cmd.into(), - segment.start, - ))], - segment: call.segment(), - arguments: call.arguments[1..].to_vec(), - type_parameters: vec![], - }; - return ascribe_pfc(&pfc, exploration, links, diagnostics, state); - } - } - - let args = call - .arguments - .iter() - .map(|expr| { - let expr = ascribe_types(exploration, links, diagnostics, expr, state); - if expr.ty == GLOB { - let glob = exploration - .get_method_exact(expr.ty, "spread", &[], builtin::STRING_VEC) - .expect("glob method not found"); - let segment = expr.segment.clone(); - TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(expr), - arguments: vec![], - function_id: glob.1, - }), - ty: glob.0.return_type, - segment, - } - } else { - convert_into_string(expr, exploration, diagnostics, links.source) - } - }) - .collect::>(); - - TypedExpr { - kind: ExprKind::ProcessCall(args), - ty: EXITCODE, - segment: call.segment(), - } -} - -fn ascribe_pfc( - call: &ProgrammaticCall, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let function_match = type_call(call, exploration, links, state, diagnostics); - TypedExpr { - kind: ExprKind::FunctionCall(FunctionCall { - arguments: function_match.arguments, - function_id: function_match.function_id, - source_id: function_match.function_source, - reef: function_match.reef, - }), - ty: function_match.return_type, - segment: call.segment.clone(), - } -} - -fn ascribe_method_call( - method: &ast::call::MethodCall, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let mut callee = ascribe_types(exploration, links, diagnostics, &method.source, state); - let arguments = method - .arguments - .iter() - .map(|expr| ascribe_types(exploration, links, diagnostics, expr, state)) - .collect::>(); - - let return_hint = if let ExpressionValue::Expected(ty) = state.local_value { - Some(ty) - } else { - None - }; - - match type_method( - method, - &callee, - links, - arguments, - diagnostics, - exploration, - links.source, - return_hint, - ) { - Some(fun) => TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(callee), - arguments: fun.arguments, - function_id: fun.function_id, - }), - ty: fun.return_type, - segment: method.segment.clone(), - }, - None => { - callee.ty = ERROR; - callee - } - } -} - -fn ascribe_loop( - loo: &Expr, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let (condition, body) = match loo { - Expr::While(w) => { - let condition = ascribe_types( - exploration, - links, - diagnostics, - &w.condition, - state.with_local_value(ExpressionValue::Unspecified), - ); - ( - Some(coerce_condition( - condition, - exploration, - links.source, - diagnostics, - )), - &w.body, - ) - } - Expr::Loop(l) => (None, &l.body), - _ => unreachable!("Expression is not a loop"), - }; - let body = ascribe_types( - exploration, - links, - diagnostics, - body, - state - .with_in_loop() - .with_local_value(ExpressionValue::Unused), - ); - - TypedExpr { - kind: ExprKind::ConditionalLoop(Loop { - condition: condition.map(Box::new), - body: Box::new(body), - }), - segment: loo.segment(), - ty: UNIT, - } -} - -fn ascribe_continue_or_break( - expr: &Expr, - diagnostics: &mut Vec, - source: SourceId, - current_reef: ReefId, - in_loop: bool, -) -> TypedExpr { - let (kind, kind_name) = match expr { - Expr::Continue(_) => (ExprKind::Continue, "continue"), - Expr::Break(_) => (ExprKind::Break, "break"), - _ => panic!("e is not a loop"), - }; - if !in_loop { - diagnostics.push( - Diagnostic::new( - DiagnosticID::InvalidBreakOrContinue, - format!("`{kind_name}` must be declared inside a loop"), - ) - .with_observation((source, current_reef, expr.segment()).into()), - ); - } - TypedExpr { - kind, - ty: NOTHING, - segment: expr.segment(), - } -} - -/// Ascribes types to the given expression. -/// -/// In case of an error, the expression is still returned, but the type is set to [`ERROR`]. -fn ascribe_types( - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - expr: &Expr, - state: TypingState, -) -> TypedExpr { - match expr { - Expr::FunctionDeclaration(fd) => { - ascribe_function_declaration(fd, exploration, links, diagnostics) - } - Expr::StructDeclaration(decl) => { - ascribe_struct_declaration(decl, exploration, links, diagnostics) - } - Expr::Literal(lit) => ascribe_literal(lit), - Expr::TemplateString(tpl) => { - ascribe_template_string(tpl, exploration, links, diagnostics, state) - } - Expr::Assign(assign) => ascribe_assign(assign, exploration, links, diagnostics, state), - Expr::VarDeclaration(decl) => { - ascribe_var_declaration(decl, exploration, links, diagnostics, state) - } - Expr::VarReference(var) => ascribe_var_reference(var, links, exploration), - Expr::FieldAccess(fa) => ascribe_field_access(fa, links, exploration, diagnostics, state), - Expr::Path(ident) => ascribe_identifier(ident, links, exploration), - Expr::If(block) => ascribe_if(block, exploration, links, diagnostics, state), - Expr::Call(call) => ascribe_call(call, exploration, links, diagnostics, state), - Expr::ProgrammaticCall(call) => ascribe_pfc(call, exploration, links, diagnostics, state), - Expr::MethodCall(method) => { - ascribe_method_call(method, exploration, links, diagnostics, state) - } - Expr::Block(b) => ascribe_block(b, links, exploration, diagnostics, state), - Expr::Redirected(redirected) => { - ascribe_redirected(redirected, exploration, links, diagnostics, state) - } - Expr::Pipeline(pipeline) => { - ascribe_pipeline(pipeline, exploration, links, diagnostics, state) - } - Expr::Substitution(subst) => { - ascribe_substitution(subst, exploration, links, diagnostics, state) - } - Expr::Detached(detached) => { - ascribe_detached(detached, exploration, links, diagnostics, state) - } - Expr::Subshell(subshell) => { - ascribe_subshell(subshell, exploration, links, diagnostics, state) - } - Expr::Return(r) => ascribe_return(r, exploration, links, diagnostics, state), - Expr::Parenthesis(paren) => { - ascribe_types(exploration, links, diagnostics, &paren.expression, state) - } - Expr::Unary(unary) => ascribe_unary(unary, exploration, links, diagnostics, state), - Expr::Binary(bo) => ascribe_binary(bo, exploration, links, diagnostics, state), - Expr::Subscript(sub) => ascribe_subscript(sub, exploration, links, diagnostics, state), - Expr::Range(range) => ascribe_range(range, exploration, links, diagnostics, state), - Expr::Tilde(tilde) => ascribe_tilde(tilde, exploration, links, diagnostics, state), - Expr::Casted(casted) => ascribe_casted(casted, exploration, links, diagnostics, state), - Expr::Test(test) => ascribe_types(exploration, links, diagnostics, &test.expression, state), - e @ (Expr::While(_) | Expr::Loop(_)) => { - ascribe_loop(e, exploration, links, diagnostics, state) - } - Expr::For(f) => ascribe_for(f, exploration, links, diagnostics, state), - e @ (Expr::Continue(_) | Expr::Break(_)) => ascribe_continue_or_break( - e, - diagnostics, - links.source, - exploration.externals.current, - state.in_loop, - ), - _ => todo!("{expr:?}"), - } -} - -#[cfg(test)] -mod tests { - use pretty_assertions::assert_eq; - - use context::source::Source; - use context::str_find::{find_in, find_in_nth}; - use parser::parse_trusted; - - use crate::analyze; - use crate::importer::StaticImporter; - use crate::name::Name; - use crate::reef::Reef; - use crate::relations::LocalId; - use crate::types::engine::{FunctionId, StructureId}; - use crate::types::ty::TypeId; - - use super::*; - - pub(crate) fn extract(source: Source) -> Result> { - let name = Name::new(source.name); - let mut externals = Externals::default(); - let mut importer = StaticImporter::new([(name.clone(), source.source)], parse_trusted); - let analyzer = analyze(name, &mut importer, &externals); - - if !analyzer.diagnostics.is_empty() { - return Err(analyzer.diagnostics); - } - - externals.register(Reef::new(source.name.to_string(), analyzer)); - - Ok(externals) - } - - pub(crate) fn extract_expr(source: Source) -> Result, Vec> { - extract(source).map(|externals| { - let chunk = externals - .get_reef(ReefId(1)) - .unwrap() - .typed_engine - .get_user(SourceId(0)) - .unwrap(); - - if let ChunkKind::DefinedFunction(Some(body)) = &chunk.kind { - if let ExprKind::Block(exprs) = &body.kind { - return exprs.clone(); - } - } - unreachable!() - }) - } - - pub(crate) fn extract_type(source: Source) -> Result> { - let externals = extract(source)?; - - let reef = externals.get_reef(ReefId(1)).unwrap(); - let chunk = reef.typed_engine.get_user(SourceId(0)).unwrap(); - - if let ChunkKind::DefinedFunction(Some(body)) = &chunk.kind { - return Ok(body.ty); - } - unreachable!() - } - - #[test] - fn single_literal() { - let res = extract_type(Source::unknown("1")); - assert_eq!(res, Ok(INT)); - } - - #[test] - fn deny_non_initialized() { - let content = "var a: Int; $a"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::UnsupportedFeature, - "Variables without initializers are not supported yet", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "var a: Int"), - "Variable declaration happens here", - ))]) - ); - } - - #[test] - fn correct_type_annotation() { - let externals = extract(Source::unknown("val a: Int = 1")).expect("got errors"); - let test_reef = externals.get_reef(ReefId(1)).unwrap(); - - let type_var = test_reef - .type_context - .get( - &test_reef.relations, - SourceId(0), - SymbolRef::Local(LocalId(0)), - ) - .unwrap(); - - assert_eq!(type_var.type_ref, INT); - } - - #[test] - fn coerce_type_annotation() { - let externals = extract(Source::unknown("val a: Float = 1")).expect("got errors"); - let test_reef = externals.get_reef(ReefId(1)).unwrap(); - - let type_var = test_reef - .type_context - .get( - &test_reef.relations, - SourceId(0), - SymbolRef::Local(LocalId(0)), - ) - .unwrap(); - - assert_eq!(type_var.type_ref, FLOAT); - } - - #[test] - fn no_coerce_type_annotation() { - let content = "val a: Int = 1.6"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Type mismatch", - ) - .with_observation(Observation::context( - SourceId(0), - ReefId(1), - find_in(content, "Int"), - "Expected `Int`", - )) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "1.6"), - "Found `Float`", - ))]) - ); - } - - #[test] - fn var_assign_of_same_type() { - let res = extract_type(Source::unknown("var l = 1; l = 2")); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn var_assign_increment() { - let res = extract_type(Source::unknown("var n = 'Hello, '; n += 'world!'")); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn invalid_left_hand_side_assignment() { - let content = "var foo = 1; foo = 'bar' = 9"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::InvalidAssignment, - "Invalid left-hand side of assignment", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "'bar'"), - "Cannot assign to this expression", - ))]) - ); - } - - #[test] - fn val_cannot_reassign() { - let content = "val l = 1; l = 2"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::CannotReassign, - "Cannot assign twice to immutable variable `l`", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "l = 2"), - "Assignment happens here", - ))]) - ); - } - - #[test] - fn cannot_assign_different_type() { - let content = "var p = 1; p = 'a'"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Cannot assign a value of type `String` to something of type `Int`", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "p = 'a'"), - "Assignment happens here", - ))]) - ); - } - - #[test] - fn no_implicit_string_conversion() { - let content = "var str: String = 'test'; str = 4"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Cannot assign a value of type `Int` to something of type `String`", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "str = 4"), - "Assignment happens here", - ))]) - ); - } - - #[test] - fn cannot_assign_to_function() { - let content = "fun a() -> Int = 1; a = 'a'"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Named object `a` cannot be assigned like a variable", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "a = 'a'"), - "Assignment happens here", - ))]) - ); - } - - #[test] - fn condition_same_type() { - let res = extract_type(Source::unknown("if true; 1; else 2")); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn condition_different_type() { - let res = extract_type(Source::unknown("if false; 4.7; else {}")); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn condition_different_type_local_return() { - let content = "var n: Int = {if false; 4.7; else {}}"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "`if` and `else` have incompatible types", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "4.7"), - "Found `Float`", - )) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "{}"), - "Found `Unit`", - ))]) - ); - } - - #[test] - fn incompatible_cast() { - let content = "val n = 'a' as Int"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::IncompatibleCast, - "Casting `String` as `Int` is invalid", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "'a' as Int"), - "Incompatible cast", - ))]) - ); - } - - #[test] - fn string_template() { - let res = extract_type(Source::unknown("val m = 5; val test = \"m = $m\"; $test")); - assert_eq!(res, Ok(STRING)); - } - - #[test] - fn function_return_type() { - let res = extract_type(Source::unknown("fun one() -> Int = 1\none()")); - assert_eq!(res, Ok(INT)); - } - - #[test] - fn local_type_only_at_end_of_block() { - let content = "fun test() -> Int = {if false; 5; else {}; 4}; test()"; - let res = extract_type(Source::unknown(content)); - assert_eq!(res, Ok(INT)); - } - - #[test] - fn wrong_arguments() { - let content = "fun square(n: Int) -> Int = $(( $n * $n ))\nsquare(9, 9)"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "This function takes 1 argument but 2 were supplied", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "square(9, 9)"), - "Function is called here", - ))]) - ); - } - - #[test] - fn wrong_arguments_type() { - let content = "fun dup(str: String) -> String = $str\ndup(4)"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Type mismatch", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "4"), - "Expected `String`, found `Int`", - )) - .with_observation(Observation::context( - SourceId(1), - ReefId(1), - find_in(content, "str: String"), - "Parameter is declared here", - ))]), - ); - } - - #[test] - fn cannot_invoke_non_function() { - let content = "val test = 1;test()"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Cannot invoke non function type", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "test()"), - "Call expression requires function, found `Int`", - ))]) - ); - } - - #[test] - fn type_function_parameters() { - let content = "fun test(a: String) = { var b: Int = $a }"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Type mismatch", - ) - .with_observation(Observation::context( - SourceId(1), - ReefId(1), - find_in(content, "Int"), - "Expected `Int`", - )) - .with_observation(Observation::here( - SourceId(1), - ReefId(1), - find_in(content, "$a"), - "Found `String`", - ))]) - ); - } - - #[test] - fn a_calling_b() { - let res = extract_type(Source::unknown( - "fun a() -> Int = b()\nfun b() -> Int = 1\na()", - )); - assert_eq!(res, Ok(INT)); - } - - #[test] - fn bidirectional_usage() { - let res = extract_type(Source::unknown( - "val PI = 3.14\nfun circle(r: Float) -> Float = $(( $PI * $r * $r ))\ncircle(1)", - )); - assert_eq!(res, Ok(FLOAT)); - } - - #[test] - fn incorrect_return_type() { - let content = "fun zero() -> String = 0"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Type mismatch", - ) - .with_observation(Observation::here( - SourceId(1), - ReefId(1), - find_in(content, "0"), - "Found `Int`", - )) - .with_observation(Observation::context( - SourceId(1), - ReefId(1), - find_in(content, "String"), - "Expected `String` because of return type", - ))]) - ); - } - - #[test] - fn explicit_valid_return() { - let content = "fun some() -> Int = return 20"; - let res = extract_type(Source::unknown(content)); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn continue_and_break_inside_loops() { - let content = "loop { continue }; loop { break }"; - let res = extract_type(Source::unknown(content)); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn continue_or_break_outside_loop() { - let content = "continue; break"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![ - Diagnostic::new( - DiagnosticID::InvalidBreakOrContinue, - "`continue` must be declared inside a loop", - ) - .with_observation((SourceId(0), ReefId(1), find_in(content, "continue")).into()), - Diagnostic::new( - DiagnosticID::InvalidBreakOrContinue, - "`break` must be declared inside a loop", - ) - .with_observation((SourceId(0), ReefId(1), find_in(content, "break")).into()), - ]) - ); - } - - #[test] - fn explicit_valid_return_mixed() { - let content = "fun some() -> Int = {\nif true; return 5; 9\n}"; - let res = extract_type(Source::unknown(content)); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn explicit_invalid_return() { - let content = "fun some() -> String = {if true; return {}; 9}"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Type mismatch", - ) - .with_observation(Observation::here( - SourceId(1), - ReefId(1), - find_in(content, "return {}"), - "Found `Unit`", - )) - .with_observation(Observation::here( - SourceId(1), - ReefId(1), - find_in(content, "9"), - "Found `Int`", - )) - .with_observation(Observation::context( - SourceId(1), - ReefId(1), - find_in(content, "String"), - "Expected `String` because of return type", - ))]) - ); - } - - #[test] - fn infer_valid_return_type() { - let content = "fun test(n: Float) = if false; 0.0; else $n; test(156.0)"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::CannotInfer, - "Return type inference is not supported yet", - ) - .with_observation(Observation::context( - SourceId(1), - ReefId(1), - find_in(content, "fun test(n: Float) = "), - "No return type is specified", - )) - .with_observation(Observation::here( - SourceId(1), - ReefId(1), - find_in(content, "if false; 0.0; else $n"), - "Returning `Float`", - )) - .with_help("Add -> Float to the function declaration")]) - ); - } - - #[test] - fn no_infer_block_return_type() { - let content = "fun test(n: Float) = {if false; return 0; $n}; test(156.0)"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::CannotInfer, - "Return type is not inferred for block functions", - ) - .with_observation(Observation::here( - SourceId(1), - ReefId(1), - find_in(content, "return 0"), - "Returning `Int`", - )) - .with_observation(Observation::here( - SourceId(1), - ReefId(1), - find_in(content, "$n"), - "Returning `Float`", - )) - .with_help( - "Try adding an explicit return type to the function" - )]) - ); - } - - #[test] - fn no_infer_complex_return_type() { - let content = "fun test() = if false; return 5; else {}; test()"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::CannotInfer, - "Failed to infer return type", - ) - .with_observation(Observation::here( - SourceId(1), - ReefId(1), - find_in(content, "fun test() = "), - "This function returns multiple types", - )) - .with_observation(Observation::here( - SourceId(1), - ReefId(1), - find_in(content, "return 5"), - "Returning `Int`", - )) - .with_help( - "Try adding an explicit return type to the function" - )]) - ); - } - - #[test] - fn conversions() { - let content = "val n = 75 + 1;val j = $n as Float\ngrep $n 4.2"; - let res = extract_expr(Source::unknown(content)); - assert_eq!( - res, - Ok(vec![ - TypedExpr { - kind: ExprKind::Declare(Declaration { - identifier: LocalId(0), - value: Some(Box::new(TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(TypedExpr { - kind: ExprKind::Literal(75.into()), - ty: INT, - segment: find_in(content, "75"), - }), - arguments: vec![TypedExpr { - kind: ExprKind::Literal(1.into()), - ty: INT, - segment: find_in(content, "1"), - }], - function_id: FunctionId(1), - }), - ty: INT, - segment: find_in(content, "75 + 1"), - })), - }), - ty: UNIT, - segment: find_in(content, "val n = 75 + 1"), - }, - TypedExpr { - kind: ExprKind::Declare(Declaration { - identifier: LocalId(1), - value: Some(Box::new(TypedExpr { - kind: ExprKind::Convert(Convert { - inner: Box::new(TypedExpr { - kind: ExprKind::Reference(Var::Local(LocalId(0))), - ty: INT, - segment: find_in(content, "$n"), - }), - into: FLOAT, - }), - ty: FLOAT, - segment: find_in(content, "$n as Float"), - })), - }), - ty: UNIT, - segment: find_in(content, "val j = $n as Float"), - }, - TypedExpr { - kind: ExprKind::ProcessCall(vec![ - TypedExpr { - kind: ExprKind::Literal("grep".into()), - ty: STRING, - segment: find_in(content, "grep"), - }, - TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(TypedExpr { - kind: ExprKind::Reference(Var::Local(LocalId(0))), - ty: INT, - segment: find_in_nth(content, "$n", 1), - }), - arguments: vec![], - function_id: FunctionId(29), - }), - ty: STRING, - segment: find_in_nth(content, "$n", 1), - }, - TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(TypedExpr { - kind: ExprKind::Literal(4.2.into()), - ty: FLOAT, - segment: find_in(content, "4.2"), - }), - arguments: vec![], - function_id: FunctionId(30), - }), - ty: STRING, - segment: find_in(content, "4.2"), - }, - ]), - ty: EXITCODE, - segment: find_in(content, "grep $n 4.2"), - }, - ]) - ); - } - - #[test] - fn invalid_operand() { - let content = "val c = 4 / 'a'; $c"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::UnknownMethod, - "Undefined operator", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "4 / 'a'"), - "No operator `div` between type `Int` and `String`", - ))]), - ); - } - - #[test] - fn undefined_operator() { - let content = "val c = 'operator' - 2.4; $c"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::UnknownMethod, - "Undefined operator", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "'operator' - 2.4"), - "No operator `sub` between type `String` and `Float`", - ))]), - ); - } - - #[test] - fn valid_operator() { - let content = "val c = 7.3 - 2.4; $c"; - let res = extract_type(Source::unknown(content)); - assert_eq!(res, Ok(FLOAT)); - } - - #[test] - fn valid_operator_explicit_method() { - let content = "val j = 7.3; val c = $j.sub(2.4); $c"; - let res = extract_type(Source::unknown(content)); - assert_eq!(res, Ok(FLOAT)); - } - - #[test] - fn valid_method_but_invalid_parameter_count() { - let content = "val n = 'test'.len(5)"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "This method takes 0 arguments but 1 was supplied", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, ".len(5)"), - "Method is called here", - )) - .with_help("The method signature is `String::len() -> Int`")]) - ); - } - - #[test] - fn valid_method_but_invalid_parameter_types() { - let content = "val j = 7.3; val c = $j.sub('a')"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Type mismatch", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "'a'"), - "Expected `Float`, found `String`", - )) - .with_observation(Observation::context( - SourceId(0), - ReefId(1), - find_in(content, "sub"), - "Arguments to this method are incorrect", - ))]) - ); - } - - #[test] - fn cannot_stringify_void() { - let content = "val v = {}; grep $v 'test'"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Cannot stringify type `Unit`", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "$v"), - "No method `to_string` on type `Unit`", - ))]) - ); - } - - #[test] - fn condition_must_be_bool() { - let content = "if 9.9 { 1 }"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Condition must be a boolean", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "9.9"), - "Type `Float` cannot be used as a condition", - ))]) - ); - } - - #[test] - fn condition_previous_error() { - let content = "if [ 9.9 % 3.3 ] { echo 'ok' }"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::UnknownMethod, - "Undefined operator", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "9.9 % 3.3"), - "No operator `mod` between type `Float` and `Float`", - ))]) - ); - } - - #[test] - fn operation_and_test() { - let content = "val m = 101; val is_even = $m % 2 == 0; $is_even"; - let res = extract_type(Source::unknown(content)); - assert_eq!(res, Ok(BOOL)); - } - - #[test] - fn condition_command() { - let res = extract_type(Source::unknown("if nginx -t { echo 'ok' }")); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn condition_invert_command() { - let res = extract_type(Source::unknown("if ! nginx -t { echo 'invalid config' }")); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn cannot_invert_string() { - let content = "val s = 'test'; val is_empty = !$s"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Cannot invert type", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "!$s"), - "Cannot invert non-boolean type `String`", - ))]) - ); - } - - #[test] - fn cannot_negate_unit() { - let content = "val opposite = -{}"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::UnknownMethod, - "Cannot negate type", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "-{}"), - "`Unit` does not implement the `neg` method", - ))]) - ); - } - - #[test] - fn no_cumulative_errors() { - let content = "var p = 'text' % 9; val r = $p.foo(); p = 4"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::UnknownMethod, - "Undefined operator", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "'text' % 9"), - "No operator `mod` between type `String` and `Int`", - ))]) - ); - } - - #[test] - fn redirect_to_string() { - let content = "val file = '/tmp/file'; cat /etc/passwd > $file 2>&1"; - let res = extract_type(Source::unknown(content)); - assert_eq!(res, Ok(EXITCODE)); - } - - #[test] - fn redirect_to_non_string() { - let content = "val file = {}; cat /etc/passwd > $file"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Cannot stringify type `Unit`", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "$file"), - "No method `to_string` on type `Unit`", - ))]) - ); - } - - #[test] - fn redirect_to_string_fd() { - let content = "grep 'test' >&matches"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "File descriptor redirections must be given an integer, not `String`", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, ">&matches"), - "Redirection happens here", - ))]) - ); - } - - #[test] - fn use_pipeline_return() { - let res = extract_type(Source::unknown( - "if echo hello | grep -q test | val m = $(cat test) {}", - )); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn use_unit_result() { - let res = extract_type(Source::unknown( - "fun foo() = { fun bar() = { return }; bar() }", - )); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn correct_concretized_type() { - let res = extract_type(Source::unknown("'Hello, world'.split(' ').push('!')")); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn access_method_in_parameter() { - let res = extract_type(Source::unknown( - "fun len(bytes: Vec[Int]) -> Int = $bytes.len()", - )); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn incorrect_generic_param() { - let content = "'Hello, world'.split(' ').push({})"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Type mismatch", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "{}"), - "Expected `String`, found `Unit`", - )) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "push"), - "Arguments to this method are incorrect", - ))]) - ); - } - - #[test] - fn incorrect_index_type() { - let content = "''.bytes()['a']"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::UnknownMethod, - "Cannot index into a value of type `Vec[Int]`", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "'a'"), - "`Vec[Int]` indices are of type `Int`", - ))]) - ); - } - - #[test] - fn assign_vec_index() { - let res = extract_type(Source::unknown( - "fun mul(v: Vec[Float], x: Float) = { - var n = 0 - while $n < $v.len() { - $v[$n] *= $x - $n += 1 - } - }", - )); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn assign_vec_index_incorrect_type() { - let content = "val v = ''.bytes(); $v[0] = 'a'"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Invalid assignment to `Int`", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "'a'"), - "Found `String`", - )) - .with_observation(Observation::context( - SourceId(0), - ReefId(1), - find_in(content, "$v[0]"), - "Expected due to the type of this binding", - ))]) - ); - } - - #[test] - fn incorrect_concretized_type() { - let content = - "val lines: Vec[String] = 'Hello, world'.split('\\n'); val first: Int = $lines[0]"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Type mismatch", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "Int"), - "Expected `Int`", - )) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "$lines[0]"), - "Found `String`", - ))]) - ); - } - - #[test] - fn different_concrete_type() { - let content = "val lines = 'Hello, world'.split('\\n'); val types: Vec[Float] = $lines"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Type mismatch", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "Vec[Float]"), - "Expected `Vec[Float]`", - )) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "$lines"), - "Found `Vec[String]`", - ))]) - ); - } - - #[test] - fn vec_in_vec() { - let content = "fun new_vec[T]() -> Vec[T]; - var v: Vec[Vec[Int]] = new_vec() - $v = new_vec() - $v[0] = new_vec()"; - let res = extract_type(Source::unknown(content)); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn invalid_type_arguments_count() { - let content = "fun foo(n: Int[Int]) = {}"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::InvalidTypeArguments, - "Type `Int` were supplied 1 generic argument", - ) - .with_observation(Observation::here( - SourceId(1), - ReefId(1), - find_in(content, "Int[Int]"), - "Expected 0 generic arguments", - ))]) - ); - } - - #[test] - fn incorrect_type_parameter_assign() { - let content = "fun id[T]() -> T; val j: Int = id[String]()"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![Diagnostic::new( - DiagnosticID::TypeMismatch, - "Type mismatch", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "Int"), - "Expected `Int`" - )) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "id[String]()"), - "Found `String`" - ))]) - ); - } - - #[test] - fn explicit_repeated_type_parameter() { - let source = Source::unknown("fun i[T, U](a: T, b: T) -> U; i::[Int, String](4, 5)"); - let res = extract_type(source); - assert_eq!(res, Ok(STRING)); - } - - #[test] - fn fun_with_generic_args() { - let content = "\ - fun push_into[A, B](v: A, vec: Vec[A], b: B) -> B = { - val x: A = $v - $vec.push($x) - $b - } - "; - let src = Source::unknown(content); - let externals = extract(src).expect("typing errors"); - let typing = &externals.get_reef(ReefId(1)).unwrap().typing; - assert_eq!(typing.get_type(TypeId(0)), Some(&Type::Polytype)); - assert_eq!(typing.get_type(TypeId(1)), Some(&Type::Polytype)); - } - - #[test] - fn fun_call_with_generic_args() { - let content = r#"\ - fun push_into[A, B](v: A, vec: Vec[A], b: B) -> B = { - val x: A = $v - $vec.push($x) - $b - } - - val vec = "".split(' ') - val i = push_into("item", $vec, 4) - "#; - let src = Source::unknown(content); - let externals = extract(src).expect("typing errors"); - let context = &externals.get_reef(ReefId(1)).unwrap().type_context; - - // `A` generic argument - assert_eq!( - context.get_local(SourceId(1), LocalId(0)), - Some(TypedVariable::immutable(TypeRef::new(ReefId(1), TypeId(0)))) - ); - // `B` generic argument - assert_eq!( - context.get_local(SourceId(1), LocalId(1)), - Some(TypedVariable::immutable(TypeRef::new(ReefId(1), TypeId(1)))) - ); - - // `v` argument - assert_eq!( - context.get_local(SourceId(1), LocalId(2)), - Some(TypedVariable::immutable(TypeRef::new(ReefId(1), TypeId(0)))) - ); - - // `vec` argument (has created a new type instantiation) - assert_eq!( - context.get_local(SourceId(1), LocalId(3)), - Some(TypedVariable::immutable(TypeRef::new(ReefId(1), TypeId(2)))) - ); - - // `b` argument - assert_eq!( - context.get_local(SourceId(1), LocalId(4)), - Some(TypedVariable::immutable(TypeRef::new(ReefId(1), TypeId(1)))) - ); - } - - #[test] - fn fun_generic_args_constraints() { - let content = "\ - fun foo[A, B](v: A, vec: Vec[A], b: B, c: B) -> B = $b - - val vec = ''.bytes() - val i: Option[Float] = foo('str_in_int_argument', $vec, '7', $vec) - "; - let src = Source::unknown(content); - let errs = extract(src).expect_err("no typing errors"); - assert_eq!( - errs, - vec![ - Diagnostic::new(DiagnosticID::TypeMismatch, "Type mismatch") - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "$vec"), - "Expected `Vec[String]`, found `Vec[Int]`", - )) - .with_observation(Observation::here( - SourceId(1), - ReefId(1), - find_in(content, "vec: Vec[A]"), - "Parameter is declared here", - )), - Diagnostic::new(DiagnosticID::TypeMismatch, "Type mismatch") - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in_nth(content, "$vec", 1), - "Expected `String`, found `Vec[Int]`", - )) - .with_observation(Observation::here( - SourceId(1), - ReefId(1), - find_in(content, "c: B"), - "Parameter is declared here", - )), - Diagnostic::new(DiagnosticID::TypeMismatch, "Type mismatch") - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "Option[Float]"), - "Expected `Option[Float]`", - )) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "foo('str_in_int_argument', $vec, '7', $vec)"), - "Found `String`", - )), - ] - ) - } - - #[test] - fn string_glob() { - let content = "val files = p'systemd-*'; echo $files"; - let res = extract_type(Source::unknown(content)); - assert_eq!(res, Ok(EXITCODE)); - } - - #[test] - fn background_process() { - let source = Source::unknown("foo &"); - let res = extract_type(source); - assert_eq!(res, Ok(PID)); - } - - #[test] - fn subprocess() { - let source = Source::unknown("(foo)"); - let res = extract_type(source); - assert_eq!(res, Ok(EXITCODE)); - } - - #[test] - fn locals_types() { - let content = "\ - struct Simple {} - val a: Simple = Simple() - val b = Simple() - - val c: Int = 7 - val d = 7 - "; - let src = Source::unknown(content); - let externals = extract(src).expect("typing errors"); - let reef = externals.get_reef(ReefId(1)).unwrap(); - let typing = &reef.typing; - let ctx = &reef.type_context; - let relations = &reef.relations; - - let assert_local_type = |local_id, type_ref| { - assert_eq!( - Some(TypedVariable::immutable(type_ref)), - ctx.get(relations, SourceId(0), SymbolRef::Local(LocalId(local_id))) - ) - }; - - assert_local_type(0, TypeRef::new(ReefId(1), TypeId(0))); //struct Simple - - assert_local_type(1, TypeRef::new(ReefId(1), TypeId(0))); //val a - assert_local_type(2, TypeRef::new(ReefId(1), TypeId(0))); //val b (inferred) - - assert_local_type(3, INT); //val c - assert_local_type(4, INT); //val d (inferred) - - // struct Simple - assert_eq!( - Some(&Type::Structure(Some(SourceId(1)), StructureId(0))), - typing.get_type(TypeId(0)) - ); - } - - #[test] - fn locals_complex_types() { - let content = "\ - struct Complex[A] {} - val a: Complex[Int] = Complex() - val b = Complex[Int]() - "; - let src = Source::unknown(content); - let externals = extract(src).expect("typing errors"); - let reef = externals.get_reef(ReefId(1)).unwrap(); - let typing = &reef.typing; - let ctx = &reef.type_context; - let relations = &reef.relations; - - let assert_local_type = |local_id, type_ref| { - assert_eq!( - Some(TypedVariable::immutable(type_ref)), - ctx.get(relations, SourceId(0), SymbolRef::Local(LocalId(local_id))) - ) - }; - - //struct Complex[A] - assert_local_type(0, TypeRef::new(ReefId(1), TypeId(0))); - - //val a, references instance of Complex[A] (Complex[Int]) - assert_local_type(1, TypeRef::new(ReefId(1), TypeId(5))); - //val b (inferred), references equivalent instance of a (Complex[Int]) - assert_local_type(2, TypeRef::new(ReefId(1), TypeId(6))); - - // struct Complex[A] - assert_eq!( - Some(&Type::Structure(Some(SourceId(1)), StructureId(0))), - typing.get_type(TypeId(0)) - ); - - // instance 1 Complex[Int] - assert_eq!( - Some(&Type::Instantiated( - TypeRef::new(ReefId(1), TypeId(0)), - vec![INT] - )), - typing.get_type(TypeId(5)) - ); - - // instance 2 Complex[Int] - assert_eq!( - Some(&Type::Instantiated( - TypeRef::new(ReefId(1), TypeId(0)), - vec![INT] - )), - typing.get_type(TypeId(6)) - ); - } -} diff --git a/analyzer/src/steps/typing/assign.rs b/analyzer/src/steps/typing/assign.rs deleted file mode 100644 index fb43cd1f..00000000 --- a/analyzer/src/steps/typing/assign.rs +++ /dev/null @@ -1,236 +0,0 @@ -use crate::diagnostic::{Diagnostic, DiagnosticID, Observation}; -use crate::steps::typing::bounds::TypesBounds; -use crate::steps::typing::coercion::{convert_expression, is_compatible}; -use crate::steps::typing::exploration::{Exploration, Links}; -use crate::steps::typing::function::{ - find_operand_implementation, list_operator_defined_for, BinaryMethodMatch, -}; -use crate::steps::typing::{ascribe_types, ExpressionValue, TypingState}; -use crate::types::hir::{ExprKind, MethodCall, TypedExpr}; -use crate::types::ty::Parameter; -use crate::types::UNIT; -use ast::operation::{BinaryOperation, BinaryOperator}; -use ast::range::Subscript; -use ast::variable::{Assign, AssignOperator}; -use ast::Expr; -use context::source::SourceSegmentHolder; - -/// Creates the right hand side of an assignment. -/// -/// The state should contain the [`ExpressionValue::Expected`] value of the left hand side. -pub(super) fn ascribe_assign_rhs( - assign: &Assign, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - match assign.operator { - AssignOperator::Assign => { - ascribe_types(exploration, links, diagnostics, &assign.value, state) - } - operator => { - let binary = Expr::Binary(BinaryOperation { - left: assign.left.clone(), - op: BinaryOperator::try_from(operator).expect("Invalid assign operator"), - right: assign.value.clone(), - }); - ascribe_types( - exploration, - links, - diagnostics, - &binary, - state.with_local_value(ExpressionValue::Unspecified), - ) - } - } -} - -pub(super) fn create_subscript( - sub: &Subscript, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> Result { - let target = ascribe_types(exploration, links, diagnostics, &sub.target, state); - let index = ascribe_types(exploration, links, diagnostics, &sub.index, state); - if index.ty.is_err() || target.ty.is_err() { - return Err(target); - } - - let index_ty = index.ty; - let target_ty = target.ty; - let methods = exploration - .get_methods(target_ty, "[]") - .map(|methods| methods.as_slice()) - .unwrap_or(&[]); - - let target_ty_base_reef = exploration.get_base_type(target_ty).reef; - let method = - find_operand_implementation(exploration, target_ty_base_reef, methods, target, index); - match method { - Ok(method) => Ok(method), - Err(target) => { - diagnostics.push(if !methods.is_empty() { - let methods: Vec<_> = methods - .iter() - .flat_map(|method_id| exploration.get_function(target_ty_base_reef, *method_id)) - .collect(); - - Diagnostic::new( - DiagnosticID::UnknownMethod, - format!( - "Cannot index into a value of type `{}`", - exploration.new_type_view(target_ty, &TypesBounds::inactive()) - ), - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - sub.index.segment(), - format!( - "`{}` indices are of type {}", - exploration.new_type_view(target_ty, &TypesBounds::inactive()), - list_operator_defined_for(exploration, &methods, &TypesBounds::inactive()), - ), - )) - } else { - Diagnostic::new( - DiagnosticID::UnknownMethod, - format!( - "The type `{}` cannot be indexed by `{}`", - exploration.new_type_view(target_ty, &TypesBounds::inactive()), - exploration.new_type_view(index_ty, &TypesBounds::inactive()) - ), - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - sub.index.segment(), - format!( - "Indexing with `{}` is invalid", - exploration.new_type_view(index_ty, &TypesBounds::inactive()) - ), - )) - }); - Err(target) - } - } -} - -pub(super) fn ascribe_assign_subscript( - assign: &Assign, - sub: &Subscript, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - // Require first that normal subscripting is available - let Ok(BinaryMethodMatch { - left: target, - right: index, - .. - }) = create_subscript( - sub, - exploration, - links, - diagnostics, - state.with_local_value(ExpressionValue::Unspecified), - ) - else { - return TypedExpr::error(assign.segment()); - }; - - let target_type_reef = exploration.get_base_type(target.ty).reef; - let Some((function_id, value_ty)) = exploration - .get_methods(target.ty, "[]") - .map(|methods| methods.as_slice()) - .unwrap_or(&[]) - .iter() - .find_map(|method_id| { - // Look for the method without worrying about potential overloads for the second parameter - let method = exploration - .get_function(target_type_reef, *method_id) - .unwrap(); - if let [Parameter { ty: index_ty, .. }, value] = method.parameters.as_slice() { - if !is_compatible(exploration, *index_ty, index.ty) || method.return_type != UNIT { - return None; - } - Some((*method_id, exploration.concretize(value.ty, target.ty))) - } else { - None - } - }) - else { - diagnostics.push( - Diagnostic::new( - DiagnosticID::TypeMismatch, - format!( - "Type `{}` is indexable but is not assignable", - exploration.new_type_view(target.ty, &TypesBounds::inactive()) - ), - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - sub.segment(), - format!( - "Indexing with `{}` does not allow assignment", - exploration.new_type_view(index.ty, &TypesBounds::inactive()) - ), - )), - ); - return TypedExpr::error(assign.segment()); - }; - - let rhs_state = state.with_local_value(ExpressionValue::Expected(value_ty)); - let rhs = ascribe_assign_rhs(assign, exploration, links, diagnostics, rhs_state); - let rhs_segment = rhs.segment(); - let rhs_ty = rhs.ty; - - if let Ok(converted) = convert_expression( - rhs, - value_ty, - &mut TypesBounds::inactive(), - exploration, - links.source, - diagnostics, - ) { - return TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(target), - arguments: vec![index, converted], - function_id, - }), - ty: UNIT, - segment: assign.segment(), - }; - } - diagnostics.push( - Diagnostic::new( - DiagnosticID::TypeMismatch, - format!( - "Invalid assignment to `{}`", - exploration.new_type_view(value_ty, &TypesBounds::inactive()) - ), - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - rhs_segment, - format!( - "Found `{}`", - exploration.new_type_view(rhs_ty, &TypesBounds::inactive()) - ), - )) - .with_observation(Observation::context( - links.source, - exploration.externals.current, - sub.segment(), - "Expected due to the type of this binding", - )), - ); - TypedExpr::error(assign.segment()) -} diff --git a/analyzer/src/steps/typing/bounds.rs b/analyzer/src/steps/typing/bounds.rs deleted file mode 100644 index 9bbc73bb..00000000 --- a/analyzer/src/steps/typing/bounds.rs +++ /dev/null @@ -1,185 +0,0 @@ -use crate::reef::ReefId; -use crate::steps::typing::exploration::Exploration; -use crate::steps::typing::function::infer_return_from_hint; -use crate::types; -use crate::types::engine::FunctionId; -use crate::types::ty::{Type, TypeRef}; -use std::collections::hash_map::Entry; -use std::collections::HashMap; -use std::iter::once; - -/// Binds a polytype to largest possible monotype -#[derive(Default, Clone)] -pub struct TypesBounds { - bounds: HashMap, -} - -impl TypesBounds { - /// Construct a type bounds with no polytype to bound, which makes it inactive - pub fn inactive() -> Self { - Self::new(HashMap::new()) - } - - pub fn new(base: HashMap) -> Self { - Self { bounds: base } - } - - pub fn get_bound(&self, ty: TypeRef) -> TypeRef { - *self.bounds.get(&ty).unwrap_or(&ty) - } - /// return true if given type is registered as a bound type but is bound to itself - pub fn is_self_bound(&self, ty: TypeRef) -> bool { - self.bounds.get(&ty).is_some_and(|t| *t == ty) - } - - /// update bounds of registered polytypes from given type scheme correlated with given bounds. - /// This method will only update bounds that are larger than the current polytypes bounds - pub(super) fn update_bounds( - &mut self, - base: TypeRef, - new_bounds: TypeRef, - exploration: &Exploration, - ) { - match self.bounds.entry(base) { - Entry::Occupied(mut o) => { - // As there is no real hierarchy for now, only the Nothing type can be more specific than any other type - // if the base type already had a bound (other than himself), we can accept larger types only (thus no NOTHING currently) - if *o.get() != base && new_bounds == types::NOTHING { - return; - } - o.insert(new_bounds); - } - Entry::Vacant(_) => { - let base_type = exploration.get_type(base).unwrap(); - let bound_type = exploration.get_type(new_bounds).unwrap(); - if let (Type::Instantiated(b1, p1), Type::Instantiated(b2, p2)) = - (base_type, bound_type) - { - for (base, bounds) in p1.iter().zip(p2) { - self.update_bounds(*base, *bounds, exploration); - } - self.update_bounds(*b1, *b2, exploration); - } - } - } - } -} - -fn extract_polytypes(tpe_ref: TypeRef, exploration: &Exploration) -> Vec { - let tpe = exploration.get_type(tpe_ref).unwrap(); - match tpe { - Type::Polytype => once(tpe_ref).collect(), - Type::Instantiated(base, params) => extract_polytypes(*base, exploration) - .into_iter() - .chain( - params - .iter() - .flat_map(|ty| extract_polytypes(*ty, exploration)), - ) - .collect(), - _ => Vec::new(), - } -} - -/// build type parameters bounds of a user-defined function. -/// The return hint is only applied if the function's return type does not depend on function's parameters. -/// Set `obj` to Some type if the function is a method that applies to the object -pub(super) fn build_bounds( - user_bounds: &[TypeRef], - obj: Option, - fun_reef: ReefId, - function_id: FunctionId, - return_hint: Option, - exploration: &Exploration, -) -> TypesBounds { - let function = exploration.get_function(fun_reef, function_id).unwrap(); - - let mut bounds = HashMap::new(); - - // add in bounds the object's type instance parameters bounds - if let Some(ty) = obj { - let base = exploration.get_type(ty).unwrap(); - if let Type::Instantiated(base, tparams) = base { - let Type::Structure(_, structure_id) = exploration.get_type(*base).unwrap() else { - panic!("type instance is not of a structured type") - }; - let base_tparams = exploration - .get_structure(base.reef, *structure_id) - .unwrap() - .type_parameters - .iter() - .map(|ty| TypeRef::new(base.reef, *ty)); - bounds.extend(base_tparams.zip(tparams.clone())) - } - } - - // collect the functions' type parameters used in the parameters. - let parameters_polytypes = function - .parameters - .iter() - .flat_map(|p| extract_polytypes(p.ty, exploration)) - .collect(); - - // Use the return type hint only if it does not contains a polytype bound with the parameters - if !type_depends_of(function.return_type, ¶meters_polytypes, exploration) { - if let Some(hint) = return_hint { - infer_return_from_hint(exploration, function.return_type, hint, &mut bounds); - } - } - - for (idx, type_param) in function.type_parameters.iter().enumerate() { - let type_param = TypeRef::new(fun_reef, *type_param); - let user_bound = user_bounds.get(idx).cloned(); - - // user has explicitly set a type bound - if let Some(user_bound) = user_bound { - bounds.insert(type_param, user_bound); - } else { - // user expects an inference - // if bounds is already know thanks to the given return type hint correlation with function types parameters - // let it as is, else, bound the type param with itself - bounds.entry(type_param).or_insert(type_param); - } - } - - TypesBounds::new(bounds) -} - -/// search if given type is contained in given polytypes or has any type parameter contained in this list. -fn type_depends_of(tpe: TypeRef, polytypes: &Vec, exploration: &Exploration) -> bool { - if polytypes.contains(&tpe) { - return true; - } - - if let Type::Instantiated(base, params) = exploration.get_type(tpe).unwrap() { - return type_depends_of(*base, polytypes, exploration) - || params - .iter() - .any(|ty| type_depends_of(*ty, polytypes, exploration)); - } - false -} - -pub(super) fn apply_bounds( - exploration: &mut Exploration, - ty: TypeRef, - bounds: &TypesBounds, -) -> TypeRef { - let ty_ref = bounds.get_bound(ty); - let ty = exploration.get_type(ty_ref).unwrap(); - if let Type::Instantiated(base, params) = ty { - let base = bounds.get_bound(*base); - let params: Vec<_> = params - .clone() - .into_iter() - .map(|ty| apply_bounds(exploration, ty, bounds)) - .collect(); - - let type_id = exploration - .typing - .add_type(Type::Instantiated(base, params), None); - return TypeRef::new(exploration.externals.current, type_id); - } - - ty_ref -} diff --git a/analyzer/src/steps/typing/coercion.rs b/analyzer/src/steps/typing/coercion.rs deleted file mode 100644 index 09e9a48e..00000000 --- a/analyzer/src/steps/typing/coercion.rs +++ /dev/null @@ -1,331 +0,0 @@ -use ast::r#type::ParametrizedType; -use context::source::{SourceSegment, SourceSegmentHolder}; - -use crate::diagnostic::{Diagnostic, DiagnosticID, Observation}; -use crate::relations::SourceId; -use crate::steps::typing::bounds::TypesBounds; -use crate::steps::typing::exploration::{Exploration, Links}; -use crate::steps::typing::lower::call_convert_on; -use crate::types::hir::TypedExpr; -use crate::types::ty::{Type, TypeRef}; -use crate::types::{UnificationError, BOOL, ERROR, NOTHING}; - -/// Unifies two type identifiers, returning the type that the right hand side was unified to. -/// -/// Unification is successful when the assignation type is a superset of the rvalue type, i.e -/// when the assignation type is a parent conceptually or technically of the rvalue type. -/// It is not reflexive, i.e. `unify(a, b)` is not the same as `unify(b, a)`. -/// -/// A conversion may be not as simple as a reinterpretation of the value, and may require -/// a conversion function to be called. Use [`convert_expression`] to -/// generate the conversion code for a typed expression. -pub(super) fn convert_description( - exploration: &Exploration, - assign_to: TypeRef, - rvalue: TypeRef, - bounds: &mut TypesBounds, - is_base_type: bool, -) -> Result { - if assign_to.is_err() || rvalue.is_err() { - // An error is compatible with everything, as it is a placeholder. - return Ok(assign_to); - } - - if rvalue == assign_to { - // if both references are the same then no need to lookup, it's the same type - return Ok(assign_to); - } - - let lhs = exploration - .get_type(assign_to) - .unwrap_or_else(|| panic!("cannot find type {assign_to:?}`")); - - if *lhs == Type::Polytype && bounds.is_self_bound(assign_to) { - return Ok(assign_to); - } - - let rhs = exploration - .get_type(rvalue) - .unwrap_or_else(|| panic!("cannot find type {rvalue:?}`")); - - // Valid excepted if both types are polytype - if *lhs != Type::Polytype && lhs == rhs { - return Ok(assign_to); - } - - // apply the `A U Nothing => A` rule only if `A` is a base type - if is_base_type && *rhs == Type::Nothing { - return Ok(assign_to); - } - - if let (Type::Instantiated(base_left, params_lhs), Type::Instantiated(base_right, params_rhs)) = - (lhs, rhs) - { - if is_compatible(exploration, *base_left, *base_right) { - // simply test if parameters of rvalue can fit to assigned target - // when generic parameters will have bounds, we'll probably want to - // assign a new type that's the result of the union of rvalue and assigned. - let are_parameters_compatible = - params_lhs - .iter() - .zip(params_rhs) - .all(|(param_lhs, param_rhs)| { - let bound = bounds.get_bound(*param_lhs); - let is_compatible = - convert_description(exploration, bound, *param_rhs, bounds, false) - .is_ok(); - - // restrict bound even more - if is_compatible { - bounds.update_bounds(*param_lhs, bound, exploration); - } - - is_compatible - }); - if are_parameters_compatible { - return Ok(assign_to); - } - } - } - - let rvalue_typing = exploration.get_types(rvalue.reef).unwrap(); - - if is_base_type { - if let Some(implicit) = rvalue_typing.implicits.get(&rvalue.type_id) { - let implicit = exploration - .get_type(*implicit) - .unwrap_or_else(|| panic!("cannot find type {implicit:?}`")); - if lhs == implicit { - return Ok(assign_to); - } - } - } - Err(UnificationError()) -} - -/// Unifies multiple type identifiers in any direction. -pub(super) fn convert_many>( - exploration: &mut Exploration, - bounds: &mut TypesBounds, - types: I, -) -> Result { - let mut types = types - .into_iter() - .filter(|ty| ty.is_ok() && !ty.is_nothing()); - - let first = types.next().unwrap_or(NOTHING); - types.try_fold(first, |acc, ty| { - convert_description(exploration, acc, ty, bounds, true) - .or_else(|_| convert_description(exploration, ty, acc, bounds, true)) - }) -} - -/// Finds the type reference from an annotation. -pub(super) fn resolve_type_annotation( - exploration: &mut Exploration, - links: Links, - type_annotation: &ast::r#type::Type, - diagnostics: &mut Vec, -) -> TypeRef { - match type_annotation { - ast::r#type::Type::Parametrized(ParametrizedType { params, .. }) => { - let env = links.env(); - let type_symbol_ref = env.get_raw_symbol(type_annotation.segment()).unwrap(); - let type_variable = exploration - .get_var(links.source, type_symbol_ref, links.relations) - .unwrap(); - let main_type = type_variable.type_ref; - - let main_base_ty = exploration.get_base_type(main_type); - let main_base_type = exploration.get_type(main_base_ty).unwrap(); - - let generics = match main_base_type { - Type::Function(_, function_id) => exploration - .get_function(main_base_ty.reef, *function_id) - .map(|s| s.type_parameters.as_slice()) - .unwrap_or(&[]), - Type::Structure(_, structure_id) => exploration - .get_structure(main_base_ty.reef, *structure_id) - .map(|s| s.type_parameters.as_slice()) - .unwrap_or(&[]), - _ => &[], - }; - - if params.len() != generics.len() { - diagnostics.push( - Diagnostic::new( - DiagnosticID::InvalidTypeArguments, - if params.len() < generics.len() { - format!( - "Missing generics for type `{}`", - exploration.new_type_view(main_type, &TypesBounds::inactive()), - ) - } else { - format!( - "Type `{}` were supplied {} generic argument{}", - exploration.new_type_view(main_type, &TypesBounds::inactive()), - params.len(), - if params.len() == 1 { "" } else { "s" } - ) - }, - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - type_annotation.segment(), - format!( - "Expected {} generic argument{}", - generics.len(), - if generics.len() == 1 { "" } else { "s" } - ), - )), - ); - return ERROR; - } else if params.is_empty() { - return main_type; - } - - let params = params - .iter() - .map(|param| resolve_type_annotation(exploration, links, param, diagnostics)) - .collect(); - let instantiated_id = exploration - .typing - .add_type(Type::Instantiated(main_type, params), None); - TypeRef::new(exploration.externals.current, instantiated_id) - } - ast::r#type::Type::Callable(_) => unimplemented!(), - ast::r#type::Type::ByName(_) => unimplemented!(), - } -} - -pub(super) fn is_compatible( - exploration: &Exploration, - assign_to: TypeRef, - rvalue: TypeRef, -) -> bool { - if assign_to.is_err() || rvalue.is_err() || rvalue.is_nothing() { - return true; // An error is compatible with everything, as it is a placeholder. - } - let lhs = exploration.get_type(assign_to).unwrap(); - let rhs = exploration.get_type(rvalue).unwrap(); - lhs == rhs -} - -/// Ensures that the type annotation accepts the given value. -/// -/// A type annotation might generate a conversion function call, which is returned. -pub(super) fn check_type_annotation( - exploration: &mut Exploration, - expected_type: TypeRef, - expected_type_segment: SourceSegment, - bounds: &mut TypesBounds, - value: TypedExpr, - links: Links, - diagnostics: &mut Vec, -) -> TypedExpr { - if value.ty.is_err() { - return value; - } - - let current_reef = exploration.externals.current; - - convert_expression( - value, - expected_type, - bounds, - exploration, - links.source, - diagnostics, - ) - .unwrap_or_else(|mut value| { - diagnostics.push( - Diagnostic::new(DiagnosticID::TypeMismatch, "Type mismatch") - .with_observation(Observation::here( - links.source, - current_reef, - expected_type_segment, - format!( - "Expected `{}`", - exploration.new_type_view(expected_type, bounds), - ), - )) - .with_observation(Observation::here( - links.source, - current_reef, - value.segment(), - format!("Found `{}`", exploration.new_type_view(value.ty, bounds)), - )), - ); - value.ty = expected_type; - value - }) -} - -/// Tries to convert an expression to the given assignation type. -/// -/// If unified, the expression is converted using the appropriate method. -/// If the conversion is incorrect, the input expression is returned, -/// in order to encourage the caller to report a specific error. -/// -/// Most of the times, it will not generate any diagnostic, since diagnostics -/// would only be generated if an implicit conversion is incorrect (i.e. if -/// it is registered but if the appropriate method is not found). -pub(super) fn convert_expression( - rvalue: TypedExpr, - assign_to: TypeRef, - bounds: &mut TypesBounds, - exploration: &mut Exploration, - source: SourceId, - diagnostics: &mut Vec, -) -> Result { - match convert_description(exploration, assign_to, rvalue.ty, bounds, true) { - Ok(ty) => Ok(call_convert_on( - rvalue, - ty, - exploration, - |ty| format!("Cannot convert type `{ty}`"), - diagnostics, - bounds, - source, - )), - Err(_) => Err(rvalue), - } -} - -/// Ensures that the expression is a boolean. -/// -/// If not, a diagnostic is generated and the expression is returned. -/// Otherwise, the converted expression is returned. -pub(super) fn coerce_condition( - condition: TypedExpr, - exploration: &mut Exploration, - source: SourceId, - diagnostics: &mut Vec, -) -> TypedExpr { - match convert_expression( - condition, - BOOL, - &mut TypesBounds::inactive(), - exploration, - source, - diagnostics, - ) { - Ok(condition) => condition, - Err(condition) => { - diagnostics.push( - Diagnostic::new(DiagnosticID::TypeMismatch, "Condition must be a boolean") - .with_observation(Observation::here( - source, - exploration.externals.current, - condition.segment(), - format!( - "Type `{}` cannot be used as a condition", - exploration.new_type_view(condition.ty, &TypesBounds::inactive()), - ), - )), - ); - condition - } - } -} diff --git a/analyzer/src/steps/typing/exploration.rs b/analyzer/src/steps/typing/exploration.rs deleted file mode 100644 index fb49e4d4..00000000 --- a/analyzer/src/steps/typing/exploration.rs +++ /dev/null @@ -1,278 +0,0 @@ -use crate::engine::Engine; -use crate::environment::symbols::Symbol; -use crate::environment::Environment; -use crate::reef::{Externals, Reef, ReefId}; -use crate::relations::{LocalId, Relations, ResolvedSymbol, SourceId, SymbolRef}; -use crate::steps::typing::bounds::TypesBounds; -use crate::steps::typing::function::Return; -use crate::steps::typing::view::TypeView; -use crate::types::ctx::{TypeContext, TypedVariable}; -use crate::types::engine::{Chunk, FunctionId, StructureId, TypedEngine}; -use crate::types::ty::{FunctionDesc, MethodType, StructureDesc, Type, TypeRef}; -use crate::types::Typing; - -/// The support for type analysis. -pub(super) struct Exploration<'a> { - pub(super) type_engine: TypedEngine, - pub(super) typing: Typing, - pub(super) ctx: TypeContext, - pub(super) returns: Vec, - pub(super) externals: &'a Externals<'a>, -} - -#[derive(Debug, Clone, Copy)] -pub(super) struct Links<'a> { - pub(super) source: SourceId, - pub(super) engine: &'a Engine<'a>, - pub(super) relations: &'a Relations, -} - -impl<'a> Links<'a> { - pub(super) fn env(self) -> &'a Environment { - self.engine.get_environment(self.source).unwrap() - } - - pub(super) fn with_source(self, source: SourceId) -> Self { - Self { source, ..self } - } -} - -impl<'a> Exploration<'a> { - pub(super) fn prepare(&mut self) { - self.returns.clear(); - } - - pub(super) fn get_type(&self, id: TypeRef) -> Option<&Type> { - let typing = if id.reef == self.externals.current { - &self.typing - } else { - &self.get_external_type_reef(id.reef).typing - }; - typing.get_type(id.type_id) - } - - pub(super) fn get_chunk(&self, reef: ReefId, source: SourceId) -> Option<&Chunk> { - let engine = if reef == self.externals.current { - &self.type_engine - } else { - &self.get_external_type_reef(reef).typed_engine - }; - engine.get_user(source) - } - - pub(super) fn get_type_name(&self, ty: TypeRef) -> Option<&String> { - if ty.reef == self.externals.current { - self.typing.get_type_name(ty.type_id) - } else { - self.externals - .get_reef(ty.reef) - .unwrap() - .typing - .get_type_name(ty.type_id) - } - } - - /// Gets the type instance of a type identifier. - pub(super) fn new_type_view(&self, id: TypeRef, bounds: &'a TypesBounds) -> TypeView { - TypeView::new(id, self, bounds) - } - - /// Gets the type of a generic type parameter. - pub(super) fn concretize(&self, generic: TypeRef, instance_holder: TypeRef) -> TypeRef { - if self.get_type(generic) != Some(&Type::Polytype) { - return generic; - } - - if let Some(&Type::Instantiated(base, ref parameters)) = self.get_type(instance_holder) { - let Type::Structure(_, structure_id) = self.get_type(base).unwrap() else { - panic!("instantiated type does not have a defined structure"); - }; - let base_type_reef = self.get_base_type(instance_holder).reef; - let polytypes = &self - .get_structure(base_type_reef, *structure_id) - .unwrap() - .type_parameters; - - let type_id = *polytypes - .iter() - .zip(parameters.iter()) - .find_map(|(generic_id, concrete)| { - (generic.reef == base_type_reef && generic.type_id == *generic_id) - .then_some(concrete) - }) - .expect("Polytype should be instantiated."); - return type_id; - } - generic - } - - pub(super) fn get_method_exact( - &self, - id: TypeRef, - name: &str, - params: &[TypeRef], - return_ty: TypeRef, - ) -> Option<(&MethodType, FunctionId)> { - let definition = self.get_base_type(id); - let current = self.externals.current; - - let &Type::Structure(_, structure_id) = self.get_type(id).unwrap() else { - return None; - }; - - if definition.reef == current || id.reef == current { - self.type_engine - .get_method_exact(structure_id, name, params, return_ty) - } else { - let reef = self.get_external_type_reef(id.reef); - reef.typed_engine - .get_method_exact(structure_id, name, params, return_ty) - } - } - - pub(super) fn get_symbol( - &self, - reef: ReefId, - source: SourceId, - local_id: LocalId, - links: Links<'a>, - ) -> Option<&'a Symbol> { - let engine = if self.externals.current == reef { - links.engine - } else { - &self.externals.get_reef(reef).unwrap().engine - }; - - engine - .get_environment(source) - .unwrap() - .symbols - .get(local_id) - } - - pub(super) fn get_var( - &self, - source: SourceId, /* FIXME */ - symbol: SymbolRef, - relations: &Relations, - ) -> Option { - let reef_id = match symbol { - SymbolRef::Local(_) => return self.ctx.get(relations, source, symbol), - SymbolRef::External(ext) => { - let call_symbol = relations[ext] - .state - .expect_resolved("Unresolved symbol during typechecking"); - call_symbol.reef - } - }; - let ctx = if reef_id == self.externals.current { - &self.ctx - } else { - &self - .externals - .get_reef(reef_id) - .expect("Unknown external reef found on symbol") - .type_context - }; - ctx.get(relations, source, symbol) - } - - pub(super) fn get_methods(&self, id: TypeRef, name: &str) -> Option<&Vec> { - let definition = self.get_base_type(id); - - let &Type::Structure(_, structure_id) = self.get_type(definition).unwrap() else { - return None; - }; - - if definition.reef == self.externals.current { - self.type_engine.get_methods(structure_id, name) - } else { - let reef = self.get_external_type_reef(definition.reef); - reef.typed_engine.get_methods(structure_id, name) - } - } - - /// Gets the base type of a type identifier. - pub(crate) fn get_base_type(&self, type_id: TypeRef) -> TypeRef { - match self.get_type(type_id).unwrap_or(&Type::Error) { - Type::Instantiated(def, _) => *def, - _ => type_id, - } - } - - pub(super) fn get_external_env( - &'a self, - from_env: &'a Environment, - to_symbol: ResolvedSymbol, - ) -> Option<&'a Environment> { - if to_symbol.reef == self.externals.current { - Some(from_env) - } else { - self.externals - .get_reef(to_symbol.reef) - .unwrap() - .engine - .get_environment(to_symbol.source) - } - } - - pub(super) fn get_types(&self, reef: ReefId) -> Option<&Typing> { - if reef == self.externals.current { - Some(&self.typing) - } else { - self.externals.get_reef(reef).map(|r| &r.typing) - } - } - - pub(super) fn get_function( - &self, - reef: ReefId, - function_id: FunctionId, - ) -> Option<&FunctionDesc> { - if reef == self.externals.current { - self.type_engine.get_function(function_id) - } else { - self.get_external_type_reef(reef) - .typed_engine - .get_function(function_id) - } - } - - pub(super) fn get_structure( - &self, - reef: ReefId, - structure_id: StructureId, - ) -> Option<&StructureDesc> { - if reef == self.externals.current { - self.type_engine.get_structure(structure_id) - } else { - self.get_external_type_reef(reef) - .typed_engine - .get_structure(structure_id) - } - } - - pub(super) fn is_compatible(&self, assign_to: TypeRef, rvalue: TypeRef) -> bool { - if assign_to.is_err() || rvalue.is_err() || rvalue.is_nothing() { - return true; // An error is compatible with everything, as it is a placeholder. - } - let lhs = self.get_type(assign_to).unwrap(); - if *lhs == Type::Polytype { - return true; - } - - let rhs = self.get_type(rvalue).unwrap(); - - if let (Type::Instantiated(base_lhs, _), Type::Instantiated(base_rhs, _)) = (lhs, rhs) { - return base_lhs == base_rhs; - } - - lhs == rhs - } - - fn get_external_type_reef(&self, id: ReefId) -> &Reef { - self.externals - .get_reef(id) - .expect("Unknown external reef found on type") - } -} diff --git a/analyzer/src/steps/typing/function.rs b/analyzer/src/steps/typing/function.rs deleted file mode 100644 index a6f88d13..00000000 --- a/analyzer/src/steps/typing/function.rs +++ /dev/null @@ -1,1057 +0,0 @@ -use std::collections::HashMap; -use std::fmt; - -use ast::call::{MethodCall, ProgrammaticCall}; -use ast::function::{FunctionDeclaration, FunctionParameter}; -use ast::Expr; -use context::source::{SourceSegment, SourceSegmentHolder}; - -use crate::diagnostic::{Diagnostic, DiagnosticID, Observation, SourceLocation}; -use crate::environment::symbols::SymbolInfo; -use crate::reef::ReefId; -use crate::relations::{LocalId, ObjectId, SourceId, SymbolRef}; -use crate::steps::typing::bounds::{apply_bounds, build_bounds, TypesBounds}; -use crate::steps::typing::coercion::{ - convert_description, convert_expression, convert_many, resolve_type_annotation, -}; -use crate::steps::typing::exploration::{Exploration, Links}; -use crate::steps::typing::view::TypeInstanceVec; -use crate::steps::typing::{ascribe_types, ExpressionValue, TypingState}; -use crate::types::engine::{Chunk, ChunkKind, FunctionId}; -use crate::types::hir::{ExprKind, TypedExpr}; -use crate::types::ty::{FunctionDesc, FunctionKind, MethodType, Parameter, Type, TypeRef}; -use crate::types::{ERROR, STRING, UNIT}; - -/// An identified return during the exploration. -#[derive(Debug, Clone, PartialEq, Eq)] -pub(super) struct Return { - /// The returned type. - pub(super) ty: TypeRef, - - /// The segment where the return is located. - pub(super) segment: SourceSegment, -} - -/// Identifies a function that correspond to a call. -#[derive(Debug, Clone, PartialEq)] -pub(super) struct FunctionMatch { - /// The converted arguments to pass to the function. - /// - /// If any conversion is required, it will be done here. - pub(super) arguments: Vec, - - /// The function identifier to call. - pub(super) function_id: FunctionId, - /// Optional chunk identifier if this function has an associated source. - pub(super) function_source: Option, - - /// The function return type. - pub(super) return_type: TypeRef, - - /// The function's reef - pub(super) reef: ReefId, -} - -/// Gets the returned type of a function. -/// -/// This verifies the type annotation if present against all the return types, -/// or try to guess the return type. -pub(super) fn infer_return( - func: &FunctionDeclaration, - expected_return_type: TypeRef, - links: Links, - typed_func_body: Option<&TypedExpr>, - diagnostics: &mut Vec, - exploration: &mut Exploration, -) -> TypeRef { - if let Some(typed_func_body) = typed_func_body { - let last = get_last_segment(typed_func_body); - // If the last statement is a return, we don't need re-add it - if exploration - .returns - .last() - .map_or(true, |ret| ret.segment != last.segment) - && last.ty.is_something() - && last.ty.is_ok() - { - exploration.returns.push(Return { - ty: typed_func_body.ty, - segment: last.segment.clone(), - }); - } - } - - let mut typed_return_locations: Vec<_> = Vec::new(); - - for ret in &exploration.returns { - if convert_description( - exploration, - expected_return_type, - ret.ty, - &mut TypesBounds::inactive(), - true, - ) - .is_err() - { - typed_return_locations.push(Observation::here( - links.source, - exploration.externals.current, - ret.segment.clone(), - if func.return_type.is_some() { - format!( - "Found `{}`", - exploration.new_type_view(ret.ty, &TypesBounds::inactive()) - ) - } else { - format!( - "Returning `{}`", - exploration.new_type_view(ret.ty, &TypesBounds::inactive()) - ) - }, - )); - } - } - - if typed_return_locations.is_empty() { - return expected_return_type; - } - - if let Some(return_type_annotation) = func.return_type.as_ref() { - diagnostics.push( - Diagnostic::new(DiagnosticID::TypeMismatch, "Type mismatch") - .with_observations(typed_return_locations) - .with_observation(Observation::context( - links.source, - exploration.externals.current, - return_type_annotation.segment(), - format!( - "Expected `{}` because of return type", - exploration.new_type_view(expected_return_type, &TypesBounds::inactive()), - ), - )), - ); - return ERROR; - } - - let Some(body) = &func.body else { - diagnostics.push( - Diagnostic::new( - DiagnosticID::CannotInfer, - "Function declaration needs explicit return type", - ) - .with_observations(typed_return_locations) - .with_help("Explicit the function's return type as it's not defined."), - ); - - return ERROR; - }; - - if matches!(body.as_ref(), Expr::Block(_)) { - diagnostics.push( - Diagnostic::new( - DiagnosticID::CannotInfer, - "Return type is not inferred for block functions", - ) - .with_observations(typed_return_locations) - .with_help("Try adding an explicit return type to the function"), - ); - - return ERROR; - } - let segment = func.segment().start..body.segment().start; - let types: Vec<_> = exploration.returns.iter().map(|ret| ret.ty).collect(); - let unify = convert_many(exploration, &mut TypesBounds::inactive(), types); - - if let Ok(common_type) = unify { - diagnostics.push( - Diagnostic::new( - DiagnosticID::CannotInfer, - "Return type inference is not supported yet", - ) - .with_observation(Observation::context( - links.source, - exploration.externals.current, - segment, - "No return type is specified", - )) - .with_observations(typed_return_locations) - .with_help(format!( - "Add -> {} to the function declaration", - exploration.new_type_view(common_type, &TypesBounds::inactive()), - )), - ); - } else { - diagnostics.push( - Diagnostic::new(DiagnosticID::CannotInfer, "Failed to infer return type") - .with_observation(Observation::context( - links.source, - exploration.externals.current, - segment, - "This function returns multiple types", - )) - .with_observations(typed_return_locations) - .with_help("Try adding an explicit return type to the function"), - ); - } - ERROR -} - -/// Ensures that the return type does not contains any reference to given type parameters of function. -fn check_for_leaked_type_parameters( - exploration: &Exploration, - not_to_leak: &[TypeRef], - return_type: TypeRef, - source: SourceId, - call_segment: SourceSegment, - diagnostics: &mut Vec, -) -> TypeRef { - let mut leaked_types = Vec::new(); - - fn collect_leaked_types( - exploration: &Exploration, - not_to_leak: &[TypeRef], - tpe: TypeRef, - leaked_types: &mut Vec, - ) { - if not_to_leak.contains(&tpe) { - leaked_types.push(tpe) - } - let ty = exploration.get_type(tpe).unwrap(); - if let Type::Instantiated(base, params) = ty { - collect_leaked_types(exploration, not_to_leak, *base, leaked_types); - for param in params { - collect_leaked_types(exploration, not_to_leak, *param, leaked_types); - } - } - } - - collect_leaked_types(exploration, not_to_leak, return_type, &mut leaked_types); - - if let Some((first, tail)) = leaked_types.split_first() { - let leaked_types_str = { - tail.iter().fold( - format!( - "`{}`", - exploration.new_type_view(*first, &TypesBounds::inactive()) - ), - |acc, it| { - format!( - "{acc}, `{}`", - exploration.new_type_view(*it, &TypesBounds::inactive()) - ) - }, - ) - }; - - diagnostics.push( - Diagnostic::new( - DiagnosticID::CannotInfer, - "Cannot infer parameter types of function", - ) - .with_observation(Observation::here( - source, - exploration.externals.current, - call_segment, - format!("please provide explicit types for generic parameters {leaked_types_str}"), - )), - ); - ERROR - } else { - return_type - } -} - -/// create a basic chunk from a function declaration -/// type its parameters, type parameters and return type -pub(super) fn declare_function( - func: &FunctionDeclaration, - exploration: &mut Exploration, - function_links: Links, - diagnostics: &mut Vec, -) -> Chunk { - let mut type_params = Vec::new(); - let mut params = Vec::new(); - - let func_source = function_links.source; - exploration - .ctx - .init_locals(func_source, function_links.env().symbols.len()); - - for (local_id, type_param) in func.type_parameters.iter().enumerate() { - let param_type_id = exploration - .typing - .add_type(Type::Polytype, Some(type_param.name.to_string())); - type_params.push(param_type_id); - - let param_type_ref = TypeRef::new(exploration.externals.current, param_type_id); - exploration - .ctx - .set_local_typed(func_source, LocalId(local_id), param_type_ref); - exploration - .ctx - .bind_name(type_param.name.to_string(), param_type_id); - } - - let tparam_count = func.type_parameters.len(); - for (param_offset, param) in func.parameters.iter().enumerate() { - let local_id = LocalId(tparam_count + param_offset); - let param = type_parameter(local_id, exploration, param, function_links, diagnostics); - exploration - .ctx - .set_local_typed(func_source, local_id, param.ty); - params.push(param); - } - - let return_type = func.return_type.as_ref().map_or(UNIT, |ty| { - resolve_type_annotation(exploration, function_links, ty, diagnostics) - }); - - let function_id = exploration.type_engine.add_function(FunctionDesc { - type_parameters: type_params, - parameters: params, - return_type, - kind: FunctionKind::Function, - }); - - let function_type = exploration.typing.add_type( - Type::Function(Some(func_source), function_id), - Some(func.name.to_string()), - ); - - // The function body will be typed on next iteration - Chunk { - function_type, - function_id, - kind: func.body.as_ref().map_or(ChunkKind::DeclaredFunction, |_| { - ChunkKind::DefinedFunction(None) - }), - } -} - -/// Checks the type of a call expression. -pub(super) fn type_call( - call: &ProgrammaticCall, - exploration: &mut Exploration, - links: Links, - state: TypingState, - diagnostics: &mut Vec, -) -> FunctionMatch { - let arguments = &call.arguments; - - let call_symbol_ref = links.env().get_raw_symbol(call.segment()).unwrap(); - - let (fun_reef, fun_origin, fun_local_id) = match call_symbol_ref { - SymbolRef::Local(lid) => (exploration.externals.current, links.source, lid), - SymbolRef::External(r) => { - let call_symbol = links.relations[r].state.expect_resolved("unresolved"); - (call_symbol.reef, call_symbol.source, call_symbol.object_id) - } - }; - - let function_type_ref = exploration - .get_var(fun_origin, call_symbol_ref, links.relations) - .unwrap() - .type_ref; - - let (function_source, function_id) = match *exploration.get_type(function_type_ref).unwrap() { - Type::Function(function_source, function_id) => (function_source, function_id), - // We are (maybe) invoking a type's constructor. - Type::Structure(structure_source, _) - // check if the symbol kind is SymbolInfo::Type, otherwise, we are trying to call a function over a variable reference - // that returns a structure, which is not something callable - if exploration.get_symbol(fun_reef, fun_origin, fun_local_id, links).unwrap().ty == SymbolInfo::Type - => { - - // there is only one constructor function for now (the default one) - let constructor_id = exploration - .get_methods(function_type_ref, "") - .unwrap()[0]; - (structure_source, constructor_id) - } - _ => { - diagnostics.push( - Diagnostic::new( - DiagnosticID::TypeMismatch, - "Cannot invoke non function type", - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - call.segment(), - format!( - "Call expression requires function, found `{}`", - exploration.new_type_view(function_type_ref, &TypesBounds::inactive()) - ), - )), - ); - - let arguments = arguments - .iter() - .map(|expr| ascribe_types(exploration, links, diagnostics, expr, state)) - .collect::>(); - - return FunctionMatch { - arguments, - function_id: FunctionId(ObjectId::MAX), - function_source: None, - return_type: ERROR, - reef: fun_reef, - }; - } - }; - - let function = exploration.get_function(fun_reef, function_id).unwrap(); - let parameters = function.parameters.clone(); // TODO: avoid clone - let return_type = function.return_type; - - if parameters.len() != arguments.len() { - diagnostics.push( - Diagnostic::new( - DiagnosticID::TypeMismatch, - format!( - "This function takes {} {} but {} {} supplied", - parameters.len(), - pluralize(parameters.len(), "argument", "arguments"), - arguments.len(), - pluralize(arguments.len(), "was", "were"), - ), - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - call.segment.clone(), - "Function is called here", - )), - ); - - let arguments = arguments - .iter() - .map(|expr| ascribe_types(exploration, links, diagnostics, expr, state)) - .collect::>(); - - FunctionMatch { - arguments, - function_id, - function_source, - return_type: ERROR, - reef: fun_reef, - } - } else { - let types_parameters: Vec<_> = function - .type_parameters - .iter() - .map(|type_id| TypeRef::new(fun_reef, *type_id)) - .collect(); - - let expected_type = if let ExpressionValue::Expected(t) = state.local_value { - Some(t) - } else { - None - }; - - let mut bounds = resolve_bounds( - &call.type_parameters, - fun_reef, - None, - function_id, - expected_type, - exploration, - links, - diagnostics, - ); - - let mut casted_arguments = Vec::with_capacity(parameters.len()); - for (param, arg) in parameters.iter().cloned().zip(arguments) { - let param_bound = bounds.get_bound(param.ty); - - let arg = ascribe_types( - exploration, - links, - diagnostics, - arg, - state.with_local_value(ExpressionValue::Expected(param_bound)), - ); - - let casted_argument = convert_expression( - arg, - param_bound, - &mut bounds, - exploration, - links.source, - diagnostics, - ); - - let casted_argument = match casted_argument { - Ok(arg) => { - bounds.update_bounds(param.ty, arg.ty, exploration); - arg - } - Err(arg) => { - diagnostics.push(diagnose_arg_mismatch( - exploration, - links.source, - exploration.externals.current, - fun_reef, - ¶m, - &arg, - &bounds, - )); - arg - } - }; - - casted_arguments.push(casted_argument); - } - - let return_type = apply_bounds(exploration, return_type, &bounds); - - let return_type = check_for_leaked_type_parameters( - exploration, - &types_parameters, - return_type, - links.source, - call.segment(), - diagnostics, - ); - - FunctionMatch { - arguments: casted_arguments, - function_id, - function_source, - return_type, - reef: fun_reef, - } - } -} - -/// update given bounds to update type parameters bounds of the function's return type from the given hint -pub(super) fn infer_return_from_hint( - exploration: &Exploration, - return_type: TypeRef, - return_type_hint: TypeRef, - bounds: &mut HashMap, -) { - let return_tpe = exploration.get_type(return_type).unwrap(); - let hint_tpe = exploration.get_type(return_type_hint).unwrap(); - match (return_tpe, hint_tpe) { - (Type::Polytype, _) => { - bounds.insert(return_type, return_type_hint); - } - (Type::Instantiated(_, return_params), Type::Instantiated(_, hint_params)) => { - for (return_param, hint_param) in return_params.iter().zip(hint_params) { - infer_return_from_hint(exploration, *return_param, *hint_param, bounds) - } - } - _ => {} - } -} - -#[allow(clippy::too_many_arguments)] -fn resolve_bounds( - user_bounds: &[ast::r#type::Type], - declaration_reef: ReefId, - obj: Option, - function_id: FunctionId, - return_hint: Option, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, -) -> TypesBounds { - let bounds_types = user_bounds - .iter() - .map(|ty| resolve_type_annotation(exploration, links, ty, diagnostics)) - .collect::>(); - let bounds = build_bounds( - &bounds_types, - obj, - declaration_reef, - function_id, - return_hint, - exploration, - ); - let entry = exploration - .get_function(declaration_reef, function_id) - .unwrap(); - - let expected_tparams_count = entry.type_parameters.len(); - if !user_bounds.is_empty() && user_bounds.len() != expected_tparams_count { - diagnostics.push(diagnose_wrong_tparams_count( - user_bounds, - expected_tparams_count, - links, - exploration.externals.current, - )); - } - - bounds -} - -fn diagnose_wrong_tparams_count( - user_tparams: &[ast::r#type::Type], - expected_count: usize, - links: Links, - reef: ReefId, -) -> Diagnostic { - let first = user_tparams.first().unwrap(); - let last = user_tparams.last().unwrap(); - - let segment = first.segment().start..last.segment().end; - - Diagnostic::new( - DiagnosticID::InvalidTypeArguments, - "Wrong type argument count", - ) - .with_observation(Observation::here( - links.source, - reef, - segment, - format!( - "`{}` type parameter specified, expected `{}`.", - user_tparams.len(), - expected_count - ), - )) -} - -/// A specialized [`crate::types::hir::MethodCall`] between two expressions. -pub(super) struct BinaryMethodMatch { - pub(crate) left: TypedExpr, - pub(crate) right: TypedExpr, - pub(crate) return_type: TypeRef, - pub(crate) function_id: FunctionId, - pub(crate) reef: ReefId, -} - -impl From for crate::types::hir::MethodCall { - fn from(binary: BinaryMethodMatch) -> Self { - Self { - callee: Box::new(binary.left), - arguments: vec![binary.right], - function_id: binary.function_id, - } - } -} - -/// Checks the type of a method expression. -pub(super) fn find_operand_implementation( - exploration: &Exploration, - reef: ReefId, - methods: &[FunctionId], - left: TypedExpr, - right: TypedExpr, -) -> Result { - for method_id in methods { - let method = exploration.get_function(reef, *method_id).unwrap(); - if let [param] = &method.parameters.as_slice() { - if param.ty == right.ty { - let return_type = exploration.concretize(method.return_type, left.ty); - return Ok(BinaryMethodMatch { - left, - right, - function_id: *method_id, - return_type, - reef, - }); - } - } - } - Err(left.poison()) -} - -/// Creates a list of the type parameters of methods. -pub(super) fn list_operator_defined_for<'a>( - exploration: &'a Exploration, - methods: &[&MethodType], - bounds: &'a TypesBounds, -) -> TypeInstanceVec<'a> { - let types = methods - .iter() - .flat_map(|method| { - if let [param] = method.parameters.as_slice() { - Some(param.ty) - } else { - None - } - }) - .collect(); - TypeInstanceVec::new(types, exploration, bounds) -} - -/// Checks the type of a method expression. -#[allow(clippy::too_many_arguments)] -pub(super) fn type_method( - method_call: &MethodCall, - callee: &TypedExpr, - links: Links, - arguments: Vec, - diagnostics: &mut Vec, - exploration: &mut Exploration, - source: SourceId, - return_hint: Option, -) -> Option { - if callee.ty.is_err() { - return None; - } - - let type_args: Vec<_> = method_call - .type_parameters - .iter() - .map(|t| resolve_type_annotation(exploration, links, t, diagnostics)) - .collect(); - - let current_reef = exploration.externals.current; - - // Directly callable types just have a single method called `apply` - let method_name = method_call - .name - .as_ref() - .map(|name| name.value.as_str()) - .unwrap_or("apply"); - let type_methods = exploration.get_methods(callee.ty, method_name); - if type_methods.is_none() { - diagnostics.push( - Diagnostic::new( - DiagnosticID::UnknownMethod, - if method_call.name.is_some() { - format!( - "No method named `{method_name}` found for type `{}`", - exploration.new_type_view(callee.ty, &TypesBounds::inactive()) - ) - } else { - format!( - "Type `{}` is not directly callable", - exploration.new_type_view(callee.ty, &TypesBounds::inactive()) - ) - }, - ) - .with_observation((source, current_reef, method_call.segment.clone()).into()), - ); - return None; - } - - let methods = type_methods.unwrap(); // We just checked for None - - let result = find_exact_method( - callee.ty, - methods, - &arguments, - &type_args, - return_hint, - exploration, - ); - - let method_base_reef = exploration.get_base_type(callee.ty).reef; - - if let Some((method_id, bounds)) = result { - let method = exploration - .get_function(method_base_reef, method_id) - .unwrap(); - - let types_parameters: Vec<_> = method - .type_parameters - .iter() - .map(|type_id| TypeRef::new(method_base_reef, *type_id)) - .collect(); - - let return_type = exploration.concretize(method.return_type, callee.ty); - let return_type = apply_bounds(exploration, return_type, &bounds); - let return_type = check_for_leaked_type_parameters( - exploration, - &types_parameters, - return_type, - links.source, - method_call.segment(), - diagnostics, - ); - - // We have an exact match - return Some(FunctionMatch { - arguments, - function_id: method_id, - function_source: None, - return_type, - reef: callee.ty.reef, - }); - } - - if methods.len() == 1 { - // If there is only one method, we can give a more specific error by adding - // an observation for each invalid type - let method_id = *methods.first().unwrap(); - let method = exploration - .get_function(method_base_reef, method_id) - .unwrap(); - - if method.parameters.len() != arguments.len() { - diagnostics.push( - Diagnostic::new( - DiagnosticID::TypeMismatch, - format!( - "This method takes {} {} but {} {} supplied", - method.parameters.len(), - pluralize(method.parameters.len(), "argument", "arguments"), - arguments.len(), - pluralize(arguments.len(), "was", "were") - ), - ) - .with_observation(Observation::here( - source, - current_reef, - method_call.segment(), - "Method is called here", - )) - .with_help(format!( - "The method signature is `{}::{}`", - exploration.new_type_view(callee.ty, &TypesBounds::inactive()), - Signature::new(exploration, method_name, method) - )), - ); - } else { - let mut bounds = resolve_bounds( - &method_call.type_parameters, - method_base_reef, - Some(callee.ty), - method_id, - return_hint, - exploration, - links, - diagnostics, - ); - - // mutable borrow of `exploration` in `resolve_bounds` call - // forces us to retrieve the method once again to drop previous - // immutable borrow of `exploration`, that lives through the `method` var. - let methods = exploration.get_methods(callee.ty, method_name).unwrap(); - let method_id = *methods.first().unwrap(); - let method = exploration - .get_function(method_base_reef, method_id) - .unwrap(); - - for (param, arg) in method.parameters.iter().zip(arguments.iter()) { - let param_bound = bounds.get_bound(param.ty); - - match convert_description(exploration, param_bound, arg.ty, &mut bounds, true) { - Ok(ty) => { - bounds.update_bounds(param.ty, ty, exploration); - } - Err(_) => { - let param = Parameter { - location: param.location.clone(), - ty: param_bound, - local_id: param.local_id, - }; - let diagnostic = diagnose_arg_mismatch( - exploration, - source, - current_reef, - callee.ty.reef, - ¶m, - arg, - &bounds, - ) - .with_observation(Observation::here( - source, - current_reef, - if let Some(name) = &method_call.name { - name.segment() - } else { - method_call.segment() - }, - "Arguments to this method are incorrect", - )); - diagnostics.push(diagnostic); - } - } - } - } - } else { - // If there are multiple methods, list them all - diagnostics.push( - Diagnostic::new( - DiagnosticID::UnknownMethod, - format!( - "No matching method found for `{method_name}::{}`", - exploration.new_type_view(callee.ty, &TypesBounds::inactive()) - ), - ) - .with_observation(Observation::here( - source, - current_reef, - method_call.segment(), - "Method is called here", - )), - ); - } - None -} - -/// Generates a type mismatch between a parameter and an argument. -fn diagnose_arg_mismatch( - exploration: &Exploration, - source: SourceId, - current_reef: ReefId, - param_reef: ReefId, - param: &Parameter, - arg: &TypedExpr, - bounds: &TypesBounds, -) -> Diagnostic { - let diagnostic = Diagnostic::new(DiagnosticID::TypeMismatch, "Type mismatch").with_observation( - Observation::here( - source, - current_reef, - arg.segment.clone(), - format!( - "Expected `{}`, found `{}`", - exploration.new_type_view(param.ty, bounds), - exploration.new_type_view(arg.ty, bounds) - ), - ), - ); - if let Some(location) = ¶m.location { - diagnostic.with_observation(Observation::context( - location.source, - param_reef, - location.segment.clone(), - "Parameter is declared here", - )) - } else { - diagnostic - } -} - -/// Finds a matching method for the given arguments. -pub(super) fn find_exact_method( - obj: TypeRef, - methods: &[FunctionId], - args: &[TypedExpr], - type_args: &[TypeRef], - return_hint: Option, - exploration: &Exploration, -) -> Option<(FunctionId, TypesBounds)> { - let obj_type_reef = exploration.get_base_type(obj).reef; - - 'methods: for method_id in methods { - let method = exploration.get_function(obj_type_reef, *method_id).unwrap(); - if method.parameters.len() != args.len() { - continue; - } - - let mut bounds = build_bounds( - type_args, - Some(obj), - obj_type_reef, - *method_id, - return_hint, - exploration, - ); - - for (param, arg) in method.parameters.iter().zip(args.iter()) { - let param_ty = exploration.concretize(param.ty, obj); - let param_bound = bounds.get_bound(param_ty); - - let converted = - convert_description(exploration, param_bound, arg.ty, &mut bounds, true); - match converted { - Ok(ty) => { - bounds.update_bounds(param.ty, ty, exploration); - } - Err(_) => continue 'methods, - } - } - return Some((*method_id, bounds)); - } - None -} - -/// Type check a single function parameter. -pub(super) fn type_parameter( - local_id: LocalId, - exploration: &mut Exploration, - param: &FunctionParameter, - links: Links, - diagnostics: &mut Vec, -) -> Parameter { - match param { - FunctionParameter::Named(named) => { - let type_id = named.ty.as_ref().map_or(STRING, |ty| { - resolve_type_annotation(exploration, links, ty, diagnostics) - }); - Parameter { - location: Some(SourceLocation::new( - links.source, - exploration.externals.current, - named.segment(), - )), - ty: type_id, - local_id, - } - } - FunctionParameter::Slf(_) => todo!("method not supported yet"), - FunctionParameter::Variadic(_, _) => todo!("Arrays are not supported yet"), - } -} - -fn get_last_segment(expr: &TypedExpr) -> &TypedExpr { - match &expr.kind { - ExprKind::Block(expressions) => expressions.last().map_or(expr, get_last_segment), - _ => expr, - } -} - -fn pluralize<'a>(count: usize, singular: &'a str, plural: &'a str) -> &'a str { - if count == 1 { - singular - } else { - plural - } -} - -/// A formatted signature of a function. -struct Signature<'a> { - exploration: &'a Exploration<'a>, - name: &'a str, - function: &'a FunctionDesc, -} - -impl<'a> Signature<'a> { - /// Creates a new signature. - fn new(exploration: &'a Exploration<'a>, name: &'a str, function: &'a FunctionDesc) -> Self { - Self { - exploration, - name, - function, - } - } -} - -impl fmt::Display for Signature<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}(", self.name)?; - if let Some((first, parameters)) = self.function.parameters.split_first() { - write!( - f, - "{}", - self.exploration - .new_type_view(first.ty, &TypesBounds::inactive()) - )?; - for param in parameters { - write!( - f, - ", {}", - self.exploration - .new_type_view(param.ty, &TypesBounds::inactive()) - )?; - } - } - if self.function.return_type.is_nothing() { - write!(f, ")") - } else { - write!( - f, - ") -> {}", - self.exploration - .new_type_view(self.function.return_type, &TypesBounds::inactive()) - ) - } - } -} diff --git a/analyzer/src/steps/typing/iterable.rs b/analyzer/src/steps/typing/iterable.rs deleted file mode 100644 index 872b3b00..00000000 --- a/analyzer/src/steps/typing/iterable.rs +++ /dev/null @@ -1,216 +0,0 @@ -use crate::diagnostic::{Diagnostic, DiagnosticID, Observation}; -use crate::reef::ReefId; -use crate::relations::SymbolRef; -use crate::steps::typing::bounds::TypesBounds; -use crate::steps::typing::exploration::{Exploration, Links}; -use crate::steps::typing::{ascribe_types, ExpressionValue, TypingState}; -use crate::types::builtin::STRING_STRUCT; -use crate::types::ctx::TypedVariable; -use crate::types::engine::StructureId; -use crate::types::hir::{ConditionalFor, ExprKind, ForLoop, RangeFor, TypedExpr}; -use crate::types::ty::Type; -use crate::types::{hir, ERROR, GENERIC_VECTOR, INT, UNIT}; -use ast::control_flow::{For, ForKind}; -use context::source::SourceSegmentHolder; - -pub(super) fn ascribe_for( - it: &For, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - match it.kind.as_ref() { - ForKind::Range(range) => { - let iterable = ascribe_types(exploration, links, diagnostics, &range.iterable, state); - let id = links.env().get_raw_symbol(range.segment.clone()).unwrap(); - let SymbolRef::Local(receiver_id) = id else { - unreachable!() - }; - exploration - .ctx - .set_local(links.source, receiver_id, TypedVariable::immutable(ERROR)); - let iterable_type = exploration.get_type(iterable.ty).unwrap(); - match iterable_type { - Type::Instantiated(vec, params) if *vec == GENERIC_VECTOR => { - let param = params[0]; - exploration.ctx.set_local( - links.source, - receiver_id, - TypedVariable::immutable(param), - ); - } - Type::Structure(_, string) if *string == STRING_STRUCT => { - exploration.ctx.set_local( - links.source, - receiver_id, - TypedVariable::immutable(iterable.ty), - ); - } - Type::Structure(_, StructureId(0 | 1)) if iterable.ty.reef == ReefId(1) => { - exploration.ctx.set_local( - links.source, - receiver_id, - TypedVariable::immutable(INT), - ); - } - _ => { - if iterable.ty.is_ok() { - diagnose_not_iterable(exploration, links, &iterable, diagnostics); - } - } - } - let body = ascribe_types( - exploration, - links, - diagnostics, - &it.body, - state - .with_in_loop() - .with_local_value(ExpressionValue::Unused), - ); - TypedExpr { - kind: ExprKind::ForLoop(ForLoop { - kind: Box::new(hir::ForKind::Range(RangeFor { - receiver: receiver_id, - receiver_type: exploration - .ctx - .get_local(links.source, receiver_id) - .unwrap() - .type_ref, - iterable, - })), - body: Box::new(body), - }), - ty: UNIT, - segment: it.segment.clone(), - } - } - ForKind::Conditional(conditional) => { - let initializer = ascribe_types( - exploration, - links, - diagnostics, - &conditional.initializer, - state, - ); - let condition = ascribe_types( - exploration, - links, - diagnostics, - &conditional.condition, - state, - ); - let increment = ascribe_types( - exploration, - links, - diagnostics, - &conditional.increment, - state, - ); - let body = ascribe_types( - exploration, - links, - diagnostics, - &it.body, - state - .with_in_loop() - .with_local_value(ExpressionValue::Unused), - ); - TypedExpr { - kind: ExprKind::ForLoop(ForLoop { - kind: Box::new(hir::ForKind::Conditional(ConditionalFor { - initializer, - condition, - increment, - })), - body: Box::new(body), - }), - ty: UNIT, - segment: it.segment.clone(), - } - } - } -} - -fn diagnose_not_iterable( - exploration: &Exploration, - links: Links, - iterable: &TypedExpr, - diagnostics: &mut Vec, -) { - diagnostics.push( - Diagnostic::new(DiagnosticID::TypeMismatch, "Expected iterable type").with_observation( - Observation::here( - links.source, - exploration.externals.current, - iterable.segment(), - format!( - "Found `{}`", - exploration.new_type_view(iterable.ty, &TypesBounds::inactive()) - ), - ), - ), - ); -} - -#[cfg(test)] -mod tests { - use crate::diagnostic::{Diagnostic, DiagnosticID, Observation}; - use crate::reef::ReefId; - use crate::relations::SourceId; - use crate::steps::typing::tests::extract_type; - use crate::types::{STRING, UNIT}; - use context::source::Source; - use context::str_find::find_in; - use pretty_assertions::assert_eq; - - #[test] - fn verify_body_when_error() { - let content = "for i in 11 - {} { i = 9 }"; - let res = extract_type(Source::unknown(content)); - assert_eq!( - res, - Err(vec![ - Diagnostic::new(DiagnosticID::UnknownMethod, "Undefined operator",) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "11 - {}"), - "No operator `sub` between type `Int` and `Unit`", - )), - Diagnostic::new( - DiagnosticID::CannotReassign, - "Cannot assign twice to immutable variable `i`", - ) - .with_observation(Observation::here( - SourceId(0), - ReefId(1), - find_in(content, "i = 9"), - "Assignment happens here", - )) - ]) - ); - } - - #[test] - fn iterate_glob() { - let source = Source::unknown("for f in p'*'.spread() { echo $f }"); - let res = extract_type(source); - assert_eq!(res, Ok(UNIT)); - } - - #[test] - fn iterate_string() { - let source = Source::unknown("var last = ''; for c in 'hello' { last = $c }; $last"); - let res = extract_type(source); - assert_eq!(res, Ok(STRING)); - } - - #[test] - fn iterate_condition() { - let source = Source::unknown("for ((var x = 0; $x < 0; $x += 0)) { echo $x }"); - let res = extract_type(source); - assert_eq!(res, Ok(UNIT)); - } -} diff --git a/analyzer/src/steps/typing/lower.rs b/analyzer/src/steps/typing/lower.rs deleted file mode 100644 index dd849d81..00000000 --- a/analyzer/src/steps/typing/lower.rs +++ /dev/null @@ -1,132 +0,0 @@ -use context::source::SourceSegmentHolder; - -use crate::diagnostic::{Diagnostic, DiagnosticID, Observation}; -use crate::relations::SourceId; -use crate::steps::typing::bounds::TypesBounds; -use crate::steps::typing::exploration::Exploration; -use crate::steps::typing::view::TypeView; -use crate::types::builtin::GENERIC_PARAMETER_1; -use crate::types::hir::{ExprKind, MethodCall, TypedExpr}; -use crate::types::ty::{Type, TypeRef}; -use crate::types::{BOOL, FLOAT, GENERIC_OPTION, STRING}; - -pub fn get_converter(ty: TypeRef) -> Option<&'static str> { - Some(match ty { - BOOL => "to_bool", - FLOAT => "to_float", - STRING => "to_string", - _ => return None, - }) -} - -/// Try to convert an expression into a string. -pub(super) fn convert_into_string( - expr: TypedExpr, - exploration: &Exploration, - diagnostics: &mut Vec, - source: SourceId, -) -> TypedExpr { - call_convert_on( - expr, - STRING, - exploration, - |ty| format!("Cannot stringify type `{ty}`",), - diagnostics, - &TypesBounds::inactive(), - source, - ) -} - -/// Generates a conversion method call if needed. -/// -/// This function must be called only if a conversion has been accepted by the type engine, -/// use the upper level function [`crate::steps::typing::coercion::convert_expression`] to -/// do the proper checks. -pub(super) fn call_convert_on( - expr: TypedExpr, - into: TypeRef, - exploration: &Exploration, - message: impl FnOnce(TypeView) -> String, - diagnostics: &mut Vec, - bounds: &TypesBounds, - source: SourceId, -) -> TypedExpr { - // If the expression is already of the needed type, we don't need to do anything. - // The `Nothing` type can be converted to anything, so we also return early. - if exploration.is_compatible(into, expr.ty) { - return expr; - } - - let method_name = match get_converter(into) { - Some(method_name) => method_name, - None => { - diagnostics.push( - Diagnostic::new( - DiagnosticID::UnknownMethod, - format!( - "No conversion method defined for type `{}`", - exploration.new_type_view(into, bounds) - ), - ) - .with_observation((source, exploration.externals.current, expr.segment()).into()), - ); - return expr; - } - }; - - // Else, we try to find the expected conversion method on the expression's type - if let Some((method, method_id)) = exploration.get_method_exact(expr.ty, method_name, &[], into) - { - let segment = expr.segment.clone(); - return TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(expr), - arguments: vec![], - function_id: method_id, - }), - ty: method.return_type, - segment, - }; - } - - diagnostics.push( - Diagnostic::new( - DiagnosticID::TypeMismatch, - message(exploration.new_type_view(expr.ty, bounds)), - ) - .with_observation(Observation::here( - source, - exploration.externals.current, - expr.segment(), - format!( - "No method `{method_name}` on type `{}`", - exploration.new_type_view(expr.ty, bounds) - ), - )), - ); - expr -} - -/// Generates a conversion method call if needed. -pub(super) fn generate_unwrap(typed: TypedExpr, exploration: &Exploration) -> TypedExpr { - let Some(Type::Instantiated(instantiated, parameters)) = exploration.get_type(typed.ty) else { - return typed; - }; - if *instantiated != GENERIC_OPTION { - return typed; - } - let return_type = *parameters.first().unwrap(); - let segment = typed.segment.clone(); - TypedExpr { - kind: ExprKind::MethodCall(MethodCall { - callee: Box::new(typed), - arguments: vec![], - function_id: exploration - .get_method_exact(*instantiated, "unwrap", &[], GENERIC_PARAMETER_1) - .expect("Option should have an `unwrap` method.") - .1, - }), - ty: return_type, - segment, - } -} diff --git a/analyzer/src/steps/typing/magic.rs b/analyzer/src/steps/typing/magic.rs deleted file mode 100644 index ee706804..00000000 --- a/analyzer/src/steps/typing/magic.rs +++ /dev/null @@ -1,66 +0,0 @@ -use std::str::FromStr; - -use context::source::{SourceSegment, SourceSegmentHolder}; - -use crate::environment::symbols::MagicSymbolKind; -use crate::name::Name; -use crate::steps::typing::exploration::{Exploration, Links}; -use crate::types::hir::{Declaration, ExprKind, FunctionCall, TypedExpr}; -use crate::types::{builtin, UNIT}; - -/// Checks if the given name is reserved for an external variable that is not -/// defined in the source. -pub fn is_magic_variable_name(name: &str) -> bool { - u32::from_str(name).is_ok() || matches!(name, "*" | "@" | "#") -} - -pub(super) fn prepend_implicits( - body: TypedExpr, - exploration: &Exploration, - links: Links, -) -> TypedExpr { - if let Some(id) = links - .env() - .symbols - .find_magic(MagicSymbolKind::ProgramArguments) - { - let (std_reef, get_args_function) = exploration - .externals - .get_reef_by_name("std") - .and_then(|(r, reef_id)| { - r.engine - .find_environment_by_name(&Name::new("std::memory::program_arguments")) - .zip(Some(reef_id)) - }) - .map(|((id, _), reef_id)| (reef_id, id)) - .expect("could not find `std::memory::program_arguments` function"); - - let get_args_chunk = exploration.get_chunk(std_reef, get_args_function).unwrap(); - - let generated_pargs_expr = TypedExpr { - kind: ExprKind::Declare(Declaration { - identifier: id, - value: Some(Box::new(TypedExpr { - kind: ExprKind::FunctionCall(FunctionCall { - arguments: vec![], - reef: std_reef, - function_id: get_args_chunk.function_id, - source_id: Some(get_args_function), - }), - ty: builtin::STRING_VEC, - segment: Default::default(), - })), - }), - ty: UNIT, - segment: SourceSegment::default(), - }; - - TypedExpr { - ty: body.ty, - segment: body.segment(), - kind: ExprKind::Block(vec![generated_pargs_expr, body]), - } - } else { - body - } -} diff --git a/analyzer/src/steps/typing/structure.rs b/analyzer/src/steps/typing/structure.rs deleted file mode 100644 index 31307de9..00000000 --- a/analyzer/src/steps/typing/structure.rs +++ /dev/null @@ -1,429 +0,0 @@ -use std::collections::HashMap; - -use ast::r#struct::{FieldAccess, StructDeclaration}; -use ast::variable::{Assign, Identifier}; -use context::source::{SourceSegment, SourceSegmentHolder}; - -use crate::diagnostic::{Diagnostic, DiagnosticID, Observation}; -use crate::reef::ReefId; -use crate::relations::{LocalId, SymbolRef}; -use crate::steps::typing::assign::ascribe_assign_rhs; -use crate::steps::typing::bounds::{apply_bounds, TypesBounds}; -use crate::steps::typing::coercion::resolve_type_annotation; -use crate::steps::typing::exploration::{Exploration, Links}; -use crate::steps::typing::{ascribe_types, ExpressionValue, TypingState}; -use crate::types::engine::StructureId; -use crate::types::hir::{ExprKind, TypedExpr}; -use crate::types::ty::{Field, FunctionDesc, Type, TypeId, TypeRef}; -use crate::types::{hir, ERROR, UNIT}; - -pub(super) fn declare_structure( - decl: &StructDeclaration, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - structure_id: StructureId, - type_id: TypeId, -) { - let current_reef = exploration.externals.current; - - let structure_env_id = links.source; - - let mut type_parameters = Vec::new(); - - exploration - .ctx - .init_locals(structure_env_id, links.env().symbols.len()); - - for (tparam_id, tparam) in decl.parameters.iter().enumerate() { - let param_type_id = exploration - .typing - .add_type(Type::Polytype, Some(tparam.name.to_string())); - let param_type_ref = TypeRef::new(current_reef, param_type_id); - type_parameters.push(param_type_id); - - exploration - .ctx - .set_local_typed(structure_env_id, LocalId(tparam_id), param_type_ref); - exploration - .ctx - .bind_name(tparam.name.to_string(), param_type_id); - } - - // set type parameters now; they will be used by the structure's fields. - exploration - .type_engine - .get_structure_mut(structure_id) - .unwrap() - .type_parameters = type_parameters.clone(); - - let mut fields = HashMap::new(); - let mut field_types = Vec::new(); - - for (field_offset, field_declaration) in decl.fields.iter().enumerate() { - let field_type = - resolve_type_annotation(exploration, links, &field_declaration.tpe, diagnostics); - - let local_id = LocalId(field_offset + type_parameters.len()); - exploration - .ctx - .set_local_typed(structure_env_id, local_id, field_type); - - field_types.push(field_type); - fields.insert( - field_declaration.name.to_string(), - Field { - ty: field_type, - local_id, - }, - ); - } - - // Then set the structure fields - exploration - .type_engine - .get_structure_mut(structure_id) - .unwrap() - .fields = fields; - - // Add default constructor function - let struct_type_ref = TypeRef::new(current_reef, type_id); - let constructor_return_type = if type_parameters.is_empty() { - // No need to instantiate a type if it has no type parameters, - // directly use the base structure type - type_id - } else { - exploration.typing.add_type( - Type::Instantiated( - struct_type_ref, - type_parameters - .iter() - .map(|ty| TypeRef::new(current_reef, *ty)) - .collect(), - ), - None, - ) - }; - let constructor = FunctionDesc::constructor( - type_parameters, - field_types, - TypeRef::new(current_reef, constructor_return_type), - ); - let constructor_fn_id = exploration - .type_engine - .add_method(structure_id, "", constructor); - - exploration.typing.add_type( - Type::Function(Some(structure_env_id), constructor_fn_id), - Some("".to_string()), - ); -} - -pub(super) fn ascribe_struct_declaration( - decl: &StructDeclaration, - exploration: &mut Exploration, - parent_links: Links, - diagnostics: &mut Vec, -) -> TypedExpr { - let structure_env_id = parent_links.env().get_raw_env(decl.segment()).unwrap(); - let structure_id = exploration.type_engine.init_empty_structure(); - - let type_id = exploration.typing.add_type( - Type::Structure(Some(structure_env_id), structure_id), - Some(decl.name.to_string()), - ); - - let links = parent_links.with_source(structure_env_id); - declare_structure(decl, exploration, links, diagnostics, structure_id, type_id); - - let type_ref = TypeRef::new(exploration.externals.current, type_id); - - let SymbolRef::Local(structure_local_id) = - parent_links.env().get_raw_symbol(decl.segment()).unwrap() - else { - unreachable!() - }; - exploration - .ctx - .set_local_typed(parent_links.source, structure_local_id, type_ref); - - TypedExpr { - kind: ExprKind::Noop, - ty: UNIT, - segment: decl.segment(), - } -} - -pub(super) fn ascribe_field_assign( - assign: &Assign, - access: &FieldAccess, - exploration: &mut Exploration, - links: Links, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let object = ascribe_types( - exploration, - links, - diagnostics, - &access.expr, - state.with_local_value(ExpressionValue::Unspecified), - ); - let Some(field_match) = type_field_access( - object.ty, - &access.field, - access.segment(), - links, - exploration, - diagnostics, - ) else { - return TypedExpr { - kind: ExprKind::Noop, - ty: ERROR, - segment: assign.segment(), - }; - }; - - let rhs = ascribe_assign_rhs( - assign, - exploration, - links, - diagnostics, - state.with_local_value(ExpressionValue::Expected(field_match.field_type)), - ); - TypedExpr { - kind: ExprKind::FieldAssign(hir::FieldAssign { - object: Box::new(object), - field: field_match.field, - structure: field_match.object_structure, - structure_reef: field_match.object_structure_reef, - new_value: Box::new(rhs), - }), - ty: UNIT, - segment: assign.segment(), - } -} - -struct FieldMatch { - object_structure: StructureId, - object_structure_reef: ReefId, - field: LocalId, - field_type: TypeRef, -} - -fn type_field_access( - object_type: TypeRef, - field_name: &Identifier, - segment: SourceSegment, - links: Links, - exploration: &mut Exploration, - diagnostics: &mut Vec, -) -> Option { - let value_tparams = match exploration.get_type(object_type).unwrap() { - Type::Instantiated(_, tparams) => tparams.as_slice(), - _ => &[], - }; - let value_base_type = exploration.get_base_type(object_type); - - let structure_id = match exploration.get_type(value_base_type).unwrap() { - Type::Structure(_, structure_id) => *structure_id, - _ => { - if object_type != ERROR { - diagnostics.push( - Diagnostic::new( - DiagnosticID::InvalidFieldAccess, - format!( - "could not access field `{}` on value of type `{}`", - field_name, - exploration.new_type_view(object_type, &TypesBounds::inactive()) - ), - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - segment, - format!( - "`{}` is not a structured type.", - exploration.new_type_view(object_type, &TypesBounds::inactive()) - ), - )), - ); - } - - return None; - } - }; - - let structure = exploration - .get_structure(value_base_type.reef, structure_id) - .unwrap(); - let bounds = TypesBounds::new( - structure - .type_parameters - .iter() - .map(|ty| TypeRef::new(value_base_type.reef, *ty)) - .zip(value_tparams.iter().copied()) - .collect(), - ); - - let field = structure.fields.get(field_name.value.as_str()); - match field { - Some(field) => Some(FieldMatch { - object_structure: structure_id, - object_structure_reef: value_base_type.reef, - field: field.local_id, - field_type: apply_bounds(exploration, field.ty, &bounds), - }), - None => { - diagnostics.push( - Diagnostic::new( - DiagnosticID::UnknownSymbol, - format!( - "unknown field `{}` in structure `{}`", - field_name, - exploration.new_type_view(object_type, &TypesBounds::inactive()) - ), - ) - .with_observation(Observation::here( - links.source, - exploration.externals.current, - segment.clone(), - format!("`{}` does not exists", field_name), - )), - ); - None - } - } -} - -pub(super) fn ascribe_field_access( - access: &FieldAccess, - links: Links, - exploration: &mut Exploration, - diagnostics: &mut Vec, - state: TypingState, -) -> TypedExpr { - let object = ascribe_types(exploration, links, diagnostics, &access.expr, state); - let field_match = type_field_access( - object.ty, - &access.field, - access.segment(), - links, - exploration, - diagnostics, - ); - match field_match { - None => TypedExpr { - kind: ExprKind::Noop, - ty: ERROR, - segment: access.segment(), - }, - Some(FieldMatch { - object_structure, - object_structure_reef, - field, - field_type, - }) => TypedExpr { - kind: ExprKind::FieldAccess(hir::FieldAccess { - object: Box::new(object), - structure: object_structure, - structure_reef: object_structure_reef, - field, - }), - ty: field_type, - segment: access.segment(), - }, - } -} - -#[cfg(test)] -mod test { - use pretty_assertions::assert_eq; - - use context::source::Source; - - use crate::reef::ReefId; - use crate::steps::typing::tests::extract_type; - use crate::types::ty::{TypeId, TypeRef}; - use crate::types::{STRING, UNIT}; - - #[test] - fn constructor() { - let expr = extract_type(Source::unknown( - r#"\ - struct Test[A] { - a: Int, - b: A, - c: Vec[A] - } - Test(7, "test", "".split(' ')) - "#, - )); - - assert_eq!(expr, Ok(TypeRef::new(ReefId(1), TypeId(6)))) - } - - #[test] - fn field_access() { - let expr = extract_type(Source::unknown( - r#"\ - struct Test[A] { - a: Int, - b: A, - c: Vec[A] - } - Test(7, "test", "".split(' ')).b - "#, - )); - - assert_eq!(expr, Ok(STRING)) - } - - #[test] - fn field_assign() { - let expr = extract_type(Source::unknown( - r#"\ - struct Test[A] { - a: Int, - b: A, - c: Vec[A] - } - Test(7, "test", "".split(' ')).b = 'bar' - "#, - )); - - assert_eq!(expr, Ok(UNIT)) - } - - #[test] - fn field_access_subscript() { - let expr = extract_type(Source::unknown( - r#"\ - struct Test[A] { - a: Int, - b: A, - c: Vec[A] - } - Test(7, "test", "".split(' ')).c[0] - "#, - )); - - assert_eq!(expr, Ok(STRING)) - } - - #[test] - fn field_assign_subscript() { - let expr = extract_type(Source::unknown( - r#"\ - struct Test[A] { - a: Int, - b: A, - c: Vec[A] - } - Test(7, "test", "".split(' ')).c[0] = 'foo' - "#, - )); - - assert_eq!(expr, Ok(UNIT)) - } -} diff --git a/analyzer/src/steps/typing/view.rs b/analyzer/src/steps/typing/view.rs deleted file mode 100644 index ee168e0c..00000000 --- a/analyzer/src/steps/typing/view.rs +++ /dev/null @@ -1,112 +0,0 @@ -use std::fmt; -use std::fmt::Display; - -use crate::steps::typing::bounds::TypesBounds; -use crate::steps::typing::exploration::Exploration; -use crate::types::ty::{Type, TypeRef}; - -#[derive(Copy, Clone)] -pub(super) struct TypeView<'a> { - pub(super) id: TypeRef, - pub(super) exploration: &'a Exploration<'a>, - pub(super) bounds: &'a TypesBounds, -} - -impl<'a> TypeView<'a> { - pub(super) fn new(id: TypeRef, exploration: &'a Exploration, bounds: &'a TypesBounds) -> Self { - Self { - id, - exploration, - bounds, - } - } -} - -impl fmt::Debug for TypeView<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self) - } -} - -impl Display for TypeView<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let ty = self.bounds.get_bound(self.id); - - let tpe = self.exploration.get_type(ty).unwrap_or(&Type::Error); - - if let Type::Instantiated(def, parameters) = tpe { - write!(f, "{}", Self::new(*def, self.exploration, self.bounds))?; - - if parameters.is_empty() { - return Ok(()); - } - - write!(f, "[")?; - for (i, parameter) in parameters.iter().enumerate() { - if i > 0 { - write!(f, ", ")?; - } - write!( - f, - "{}", - &Self::new(*parameter, self.exploration, self.bounds) - )?; - } - return write!(f, "]"); - } - - write!( - f, - "{}", - self.exploration - .get_type_name(ty) - .map(String::as_str) - .unwrap_or("") - ) - } -} - -pub struct TypeInstanceVec<'a> { - pub(super) ids: Vec, - pub(super) exploration: &'a Exploration<'a>, - pub(super) bounds: &'a TypesBounds, -} - -impl<'a> TypeInstanceVec<'a> { - pub(super) fn new( - ids: Vec, - exploration: &'a Exploration, - bounds: &'a TypesBounds, - ) -> Self { - Self { - ids, - exploration, - bounds, - } - } -} - -impl fmt::Debug for TypeInstanceVec<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "[")?; - for (i, id) in self.ids.iter().enumerate() { - if i > 0 { - write!(f, ", ")?; - } - write!(f, "{}", TypeView::new(*id, self.exploration, self.bounds))?; - } - write!(f, "]") - } -} - -impl fmt::Display for TypeInstanceVec<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for (i, id) in self.ids.iter().enumerate() { - if i > 0 { - write!(f, ", ")?; - } - write!(f, "`{}`", TypeView::new(*id, self.exploration, self.bounds))?; - } - Ok(()) - } -} diff --git a/analyzer/src/symbol.rs b/analyzer/src/symbol.rs new file mode 100644 index 00000000..ed36850a --- /dev/null +++ b/analyzer/src/symbol.rs @@ -0,0 +1,211 @@ +use crate::module::Export; +use crate::typing::user::TypeId; +use context::source::Span; +use std::fmt; +use std::path::PathBuf; + +/// A binding that can be accessed by a name. +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Symbol { + /// The name from which this symbol can be accessed. + pub name: String, + + /// The known type of the symbol. + /// + /// The type should not be changed after it is set. + pub ty: TypeId, + + /// The depth at which this symbol was declared. + scope: usize, + + /// The byte span where this symbol was declared. + pub declared_at: Span, + + /// The kind of symbol this is. + pub registry: SymbolRegistry, +} + +/// A short representation of a [`Symbol`]. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SymbolDesc { + /// The kind of symbol this is. + pub registry: SymbolRegistry, + + /// The byte span where this symbol was declared. + pub span: Span, +} + +impl From<&Symbol> for SymbolDesc { + fn from(symbol: &Symbol) -> Self { + Self { + registry: symbol.registry, + span: symbol.declared_at.clone(), + } + } +} + +/// A hint of where the symbol could be found. +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum SymbolRegistry { + /// A named variable. + Variable, + + /// A named function. + /// + /// When there is both a function and a variable with the same name and when we are looking + /// for a function call, the function should be preferred. + Function, + + /// A named type, either a struct or a module. + Type, +} + +impl fmt::Display for SymbolRegistry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SymbolRegistry::Variable => write!(f, "variable"), + SymbolRegistry::Function => write!(f, "function"), + SymbolRegistry::Type => write!(f, "type"), + } + } +} + +/// Tracks the currently reachable symbols. +pub(crate) struct SymbolTable { + /// The path to the file that this symbol table is for. + pub(crate) path: PathBuf, + + /// The symbols in scope. + /// + /// The last symbol is the most recently defined one and the first symbol to be looked up + /// when resolving a symbol. + symbols: Vec, + + /// Tracks the current depth in order to detect when symbols become out of scope. + pub(crate) current_depth: usize, +} + +impl SymbolTable { + /// Creates a new table for the given file path. + pub(super) fn new(path: PathBuf) -> Self { + Self { + path, + symbols: Vec::new(), + current_depth: 0, + } + } + + /// Registers a new symbol that is defined in the current scope. + /// + /// It may be retrieved by its name and registry while it is reachable. Symbols are dropped when + /// going out of scope and masked when a new symbol with the same name is defined. + pub(crate) fn insert_local( + &mut self, + name: String, + ty: TypeId, + declared_at: Span, + registry: SymbolRegistry, + ) { + self.symbols.push(Symbol { + name, + ty, + scope: self.current_depth, + declared_at, + registry, + }); + } + + /// Inserts a symbol that comes from an external source. + pub(crate) fn insert_remote( + &mut self, + name: String, + imported_at: Span, + Export { ty, registry, .. }: &Export, + ) { + self.symbols.push(Symbol { + name, + ty: *ty, + scope: self.current_depth, + declared_at: imported_at, + registry: *registry, + }); + } + + /// Gets a named binding that refers to a specific registry. + /// + /// The same identifier may be used for different kinds of bindings, such as a variable and a + /// function. This method will use the most recently defined binding that matches the given + /// registry. + pub(crate) fn get(&self, name: &str, registry: SymbolRegistry) -> Option<&Symbol> { + self.symbols + .iter() + .rev() + .find(|symbol| symbol.name == name && symbol.registry == registry) + } + + /// Tries to find a symbol by its name and registry. + /// + /// It tries to find a similar symbol in the [`Err`] variant that may help the user to find the + /// correct symbol. + pub(crate) fn lookup( + &self, + name: &str, + registry: SymbolRegistry, + ) -> Result<&Symbol, UndefinedSymbol> { + self.lookup_position(name, registry) + .map(|(_, symbol)| symbol) + } + + pub(crate) fn lookup_position( + &self, + name: &str, + registry: SymbolRegistry, + ) -> Result<(usize, &Symbol), UndefinedSymbol> { + let mut other_symbol: Option<&Symbol> = None; + for (idx, symbol) in self.symbols.iter().enumerate().rev() { + if symbol.name == name { + if symbol.registry == registry { + return Ok((idx, symbol)); + } else { + other_symbol = Some(symbol); + } + } + } + Err(match other_symbol { + Some(symbol) => UndefinedSymbol::WrongRegistry(SymbolDesc::from(symbol)), + None => UndefinedSymbol::NotFound, + }) + } + + pub(crate) fn len(&self) -> usize { + self.symbols.len() + } + + pub(crate) fn enter_scope(&mut self) { + self.current_depth += 1; + } + + pub(crate) fn exit_scope(&mut self) { + self.current_depth -= 1; + self.symbols + .retain(|symbol| symbol.scope <= self.current_depth); + } +} + +/// Some [`Symbol`] could not be found. +pub(super) enum UndefinedSymbol { + /// No symbol with the given name was found. + NotFound, + + /// A symbol with the same name was found but with a different [`SymbolRegistry`]. + WrongRegistry(SymbolDesc), +} + +impl From for Option { + fn from(undefined: UndefinedSymbol) -> Self { + match undefined { + UndefinedSymbol::NotFound => None, + UndefinedSymbol::WrongRegistry(symbol) => Some(symbol), + } + } +} diff --git a/analyzer/src/types.rs b/analyzer/src/types.rs deleted file mode 100644 index bc992807..00000000 --- a/analyzer/src/types.rs +++ /dev/null @@ -1,68 +0,0 @@ -use std::collections::HashMap; - -use crate::reef::LANG_REEF; -use crate::types::ty::{Type, TypeId, TypeRef}; - -pub mod builtin; -pub mod ctx; -pub mod engine; -pub mod hir; -pub mod operator; -pub mod ty; - -/// Holds all the known types. -#[derive(Default, Debug, Clone)] -pub struct Typing { - /// The actual types, bound with an optional name. - types: Vec<(Type, Option)>, - - /// A list of implicit conversions from one type to another. - pub(crate) implicits: HashMap, -} - -impl Typing { - pub(crate) fn set_implicit_conversion(&mut self, from: TypeId, to: TypeRef) { - self.implicits.insert(from, to); - } - - pub(crate) fn add_type(&mut self, ty: Type, name: Option) -> TypeId { - let type_id = TypeId(self.types.len()); - self.types.push((ty, name)); - type_id - } - - /// Gets the type with the given identifier. - pub fn get_type(&self, type_id: TypeId) -> Option<&Type> { - self.types.get(type_id.0).map(|(t, _)| t) - } - - pub fn get_type_name(&self, type_id: TypeId) -> Option<&String> { - self.types - .get(type_id.0) - .and_then(|(_, name)| name.as_ref()) - } - - pub fn iter(&self) -> impl Iterator { - self.types - .iter() - .enumerate() - .map(|(idx, (tpe, _))| (TypeId(idx), tpe)) - } -} - -pub const ERROR: TypeRef = TypeRef::new(LANG_REEF, TypeId(0)); -pub const NOTHING: TypeRef = TypeRef::new(LANG_REEF, TypeId(1)); -pub const UNIT: TypeRef = TypeRef::new(LANG_REEF, TypeId(2)); -pub const BOOL: TypeRef = TypeRef::new(LANG_REEF, TypeId(3)); -pub const EXITCODE: TypeRef = TypeRef::new(LANG_REEF, TypeId(4)); -pub const INT: TypeRef = TypeRef::new(LANG_REEF, TypeId(5)); -pub const FLOAT: TypeRef = TypeRef::new(LANG_REEF, TypeId(6)); -pub const STRING: TypeRef = TypeRef::new(LANG_REEF, TypeId(7)); -pub const GENERIC_VECTOR: TypeRef = TypeRef::new(LANG_REEF, TypeId(8)); -pub const GENERIC_OPTION: TypeRef = TypeRef::new(LANG_REEF, TypeId(9)); -pub const GLOB: TypeRef = TypeRef::new(LANG_REEF, TypeId(10)); -pub const PID: TypeRef = TypeRef::new(LANG_REEF, TypeId(11)); - -/// An error that occurs when two types are not compatible. -#[derive(Debug, PartialEq)] -pub struct UnificationError(); diff --git a/analyzer/src/types/builtin.rs b/analyzer/src/types/builtin.rs deleted file mode 100644 index 36b6f28e..00000000 --- a/analyzer/src/types/builtin.rs +++ /dev/null @@ -1,318 +0,0 @@ -use ast::operation::BinaryOperator; - -use crate::engine::Engine; - -use crate::reef::{Reef, LANG_REEF}; -use crate::relations::{LocalId, Relations, SourceId}; -use crate::types::ctx::TypeContext; -use crate::types::engine::{StructureId, TypedEngine}; -use crate::types::operator::name_operator_method; -use crate::types::ty::{MethodType, Type, TypeId, TypeRef}; -use crate::types::{ - Typing, BOOL, ERROR, EXITCODE, FLOAT, GENERIC_OPTION, GENERIC_VECTOR, GLOB, INT, NOTHING, PID, - STRING, UNIT, -}; - -const ARITHMETIC_OPERATORS: &[BinaryOperator] = &[ - BinaryOperator::Plus, - BinaryOperator::Minus, - BinaryOperator::Times, - BinaryOperator::Divide, -]; -const COMPARISON_OPERATORS: &[BinaryOperator] = &[ - BinaryOperator::EqualEqual, - BinaryOperator::NotEqual, - BinaryOperator::Less, - BinaryOperator::LessEqual, - BinaryOperator::Greater, - BinaryOperator::GreaterEqual, -]; -const EQUALITY_OPERATORS: &[BinaryOperator] = - &[BinaryOperator::EqualEqual, BinaryOperator::NotEqual]; - -const LOGICAL_OPERATORS: &[BinaryOperator] = &[BinaryOperator::And, BinaryOperator::Or]; - -/// Some common types. -pub const STRING_VEC: TypeRef = TypeRef::new(LANG_REEF, TypeId(12)); -pub const INT_VEC: TypeRef = TypeRef::new(LANG_REEF, TypeId(13)); - -/// generic parameters used by the lang reef. -/// The lang reef is a special reef that reuses the same generic parameters for each functions. -pub const GENERIC_PARAMETER_1: TypeRef = TypeRef::new(LANG_REEF, TypeId(14)); -pub const UNIT_STRUCT: StructureId = StructureId(0); -pub const BOOL_STRUCT: StructureId = StructureId(1); -pub const EXITCODE_STRUCT: StructureId = StructureId(2); -pub const INT_STRUCT: StructureId = StructureId(3); -pub const FLOAT_STRUCT: StructureId = StructureId(4); -pub const STRING_STRUCT: StructureId = StructureId(5); -pub const VEC_STRUCT: StructureId = StructureId(6); -pub const OPTION_STRUCT: StructureId = StructureId(7); -pub const GLOB_STRUCT: StructureId = StructureId(8); -pub const PID_STRUCT: StructureId = StructureId(9); - -fn get_lang_struct_id(typing: &mut Typing, ty: TypeRef) -> StructureId { - let Type::Structure(_, structure_id) = typing.get_type(ty.type_id).unwrap() else { - panic!("given type is not a structured type") - }; - *structure_id -} - -/// Adds the native methods to the engine. -fn fill_lang_typed_engine(engine: &mut TypedEngine, typing: &mut Typing) { - // declare one generic parameter type, methods will reuse it - let generic_param1 = TypeRef::new( - LANG_REEF, - typing.add_type(Type::Polytype, Some("A".to_string())), - ); - - // option containing generic parameter - let opt_type = typing.add_type( - Type::Instantiated(GENERIC_OPTION, vec![generic_param1]), - None, - ); - - engine.add_method( - EXITCODE_STRUCT, - "to_bool", - MethodType::function(vec![], vec![], BOOL), - ); - - for op in ARITHMETIC_OPERATORS { - engine.add_method( - INT_STRUCT, - name_operator_method(*op), - MethodType::function(vec![], vec![INT], INT), - ); - engine.add_method( - FLOAT_STRUCT, - name_operator_method(*op), - MethodType::function(vec![], vec![FLOAT], FLOAT), - ); - } - engine.add_method( - INT_STRUCT, - name_operator_method(BinaryOperator::Modulo), - MethodType::function(vec![], vec![INT], INT), - ); - engine.add_method( - BOOL_STRUCT, - "not", - MethodType::function(vec![], vec![], BOOL), - ); - for ty in [BOOL, STRING] { - let ty_structure = get_lang_struct_id(typing, ty); - for op in EQUALITY_OPERATORS { - engine.add_method( - ty_structure, - name_operator_method(*op), - MethodType::function(vec![], vec![ty], BOOL), - ); - } - } - for ty in [INT, FLOAT] { - let ty_structure = get_lang_struct_id(typing, ty); - for op in COMPARISON_OPERATORS { - engine.add_method( - ty_structure, - name_operator_method(*op), - MethodType::function(vec![], vec![ty], BOOL), - ); - } - } - for struct_id in [BOOL_STRUCT, EXITCODE_STRUCT, INT_STRUCT, FLOAT_STRUCT] { - engine.add_method( - struct_id, - "to_string", - MethodType::function(vec![], vec![], STRING), - ); - } - engine.add_method( - INT_STRUCT, - "to_float", - MethodType::function(vec![], vec![], FLOAT), - ); - - engine.add_method( - STRING_STRUCT, - "len", - MethodType::function(vec![], vec![], INT), - ); - engine.add_method( - STRING_STRUCT, - name_operator_method(BinaryOperator::Plus), - MethodType::function(vec![], vec![STRING], STRING), - ); - - engine.add_generic(VEC_STRUCT, generic_param1.type_id); - - engine.add_method( - VEC_STRUCT, - "[]", - MethodType::function(vec![], vec![INT], generic_param1), - ); - - engine.add_method( - VEC_STRUCT, - "push", - MethodType::function(vec![], vec![generic_param1], UNIT), - ); - - engine.add_method( - VEC_STRUCT, - "pop", - MethodType::function(vec![], vec![], TypeRef::new(LANG_REEF, opt_type)), - ); - engine.add_method(VEC_STRUCT, "len", MethodType::function(vec![], vec![], INT)); - - engine.add_method( - STRING_STRUCT, - "split", - MethodType::function(vec![], vec![STRING], STRING_VEC), - ); - engine.add_method( - STRING_STRUCT, - "bytes", - MethodType::function(vec![], vec![], INT_VEC), - ); - - for operand in [BOOL, EXITCODE] { - let operand_struct = get_lang_struct_id(typing, operand); - for op in LOGICAL_OPERATORS { - engine.add_method( - operand_struct, - name_operator_method(*op), - MethodType::function(vec![], vec![operand], operand), - ); - } - } - for operand in [INT, FLOAT] { - let operand_struct = get_lang_struct_id(typing, operand); - engine.add_method( - operand_struct, - "neg", - MethodType::function(vec![], vec![], operand), - ); - } - - engine.add_generic(OPTION_STRUCT, generic_param1.type_id); - engine.add_method( - OPTION_STRUCT, - "is_none", - MethodType::function(vec![], vec![], BOOL), - ); - engine.add_method( - OPTION_STRUCT, - "is_some", - MethodType::function(vec![], vec![], BOOL), - ); - engine.add_method( - OPTION_STRUCT, - "unwrap", - MethodType::function(vec![], vec![], generic_param1), - ); - engine.add_method( - VEC_STRUCT, - "[]", - MethodType::function(vec![], vec![INT, generic_param1], UNIT), - ); - - engine.add_method( - INT_STRUCT, - "to_exitcode", - MethodType::function(vec![], vec![], EXITCODE), - ); - engine.add_method( - EXITCODE_STRUCT, - "to_int", - MethodType::function(vec![], vec![], INT), - ); - - engine.add_method( - VEC_STRUCT, - "pop_head", - MethodType::function(vec![], vec![], generic_param1), - ); - - engine.add_method( - GLOB_STRUCT, - "spread", - MethodType::function(vec![], vec![], STRING_VEC), - ); - engine.add_method( - PID_STRUCT, - "to_string", - MethodType::function(vec![], vec![], STRING), - ); -} - -fn fill_lang_types(typing: &mut Typing, engine: &mut TypedEngine) { - typing.add_type(Type::Error, Some("Error".to_string())); - typing.add_type(Type::Nothing, Some("Nothing".to_string())); - for primitive_name in [ - "Unit", "Bool", "Exitcode", "Int", "Float", "String", "Vec", "Option", "Glob", "Pid", - ] { - let structure_id = engine.init_empty_structure(); - typing.add_type( - Type::Structure(None, structure_id), - Some(primitive_name.to_string()), - ); - } - //init int vectors and string vectors (will be used by methods) - typing.add_type(Type::Instantiated(GENERIC_VECTOR, vec![STRING]), None); - typing.add_type(Type::Instantiated(GENERIC_VECTOR, vec![INT]), None); - - typing.set_implicit_conversion(EXITCODE.type_id, BOOL); - typing.set_implicit_conversion(INT.type_id, FLOAT); -} - -fn fill_lang_bindings(ctx: &mut TypeContext) { - ctx.bind_name("Nothing".to_string(), NOTHING.type_id); - ctx.bind_name("Unit".to_string(), UNIT.type_id); - ctx.bind_name("Bool".to_string(), BOOL.type_id); - ctx.bind_name("Exitcode".to_string(), EXITCODE.type_id); - ctx.bind_name("Int".to_string(), INT.type_id); - ctx.bind_name("Float".to_string(), FLOAT.type_id); - ctx.bind_name("String".to_string(), STRING.type_id); - ctx.bind_name("Vec".to_string(), GENERIC_VECTOR.type_id); - ctx.bind_name("Option".to_string(), GENERIC_OPTION.type_id); - ctx.bind_name("Glob".to_string(), GLOB.type_id); - ctx.bind_name("Pid".to_string(), PID.type_id); - - let locals = [ - ERROR, - NOTHING, - UNIT, - BOOL, - EXITCODE, - INT, - FLOAT, - STRING, - GENERIC_VECTOR, - GENERIC_OPTION, - GLOB, - PID, - ]; - - ctx.init_locals(SourceId(0), locals.len()); - - for (local_id, local) in locals.iter().enumerate() { - ctx.set_local_typed(SourceId(0), LocalId(local_id), *local); - } -} - -pub fn lang_reef() -> Reef<'static> { - let mut reef = Reef { - name: "lang".to_string(), - engine: Engine::default(), - relations: Relations::default(), - typed_engine: TypedEngine::default(), - typing: Typing::default(), - type_context: TypeContext::default(), - }; - - fill_lang_types(&mut reef.typing, &mut reef.typed_engine); - fill_lang_bindings(&mut reef.type_context); - fill_lang_typed_engine(&mut reef.typed_engine, &mut reef.typing); - - reef -} diff --git a/analyzer/src/types/ctx.rs b/analyzer/src/types/ctx.rs deleted file mode 100644 index 82815d6b..00000000 --- a/analyzer/src/types/ctx.rs +++ /dev/null @@ -1,100 +0,0 @@ -use std::collections::hash_map::Entry; -use std::collections::HashMap; - -use crate::relations::{LocalId, Relations, SourceId, SymbolRef}; -use crate::types::ty::{TypeId, TypeRef}; - -/// Holds the symbol to type mapping. -/// -/// The actual type definition is in the [`crate::types::Typing`] struct. -#[derive(Default, Debug)] -pub struct TypeContext { - names: HashMap, - locals: HashMap>>, -} - -impl TypeContext { - /// Returns the type id of a symbol. - pub(crate) fn get( - &self, - relations: &Relations, - source: SourceId, - symbol: SymbolRef, - ) -> Option { - match symbol { - SymbolRef::Local(local) => self.get_local(source, local), - - SymbolRef::External(index) => { - let resolved = relations[index].state.expect_resolved("Unresolved symbol"); - // assume that the resolved symbol's reef points to this context's reef - self.get_local(resolved.source, resolved.object_id) - } - } - } - - pub(crate) fn get_local(&self, source: SourceId, id: LocalId) -> Option { - self.locals - .get(&source) - .unwrap() - .get(id.0) - .and_then(Option::clone) - } - - /// init a source locals area of the given len - pub(crate) fn init_locals(&mut self, source: SourceId, len: usize) { - match self.locals.entry(source) { - Entry::Occupied(_) => panic!("locals already initialized for source {source:?}"), - Entry::Vacant(v) => v.insert(vec![None; len]), - }; - } - - /// Defines the type of an environment's local. - pub(crate) fn set_local_typed(&mut self, source: SourceId, local: LocalId, type_ref: TypeRef) { - self.set_local(source, local, TypedVariable::immutable(type_ref)) - } - - /// Defines the identity of an environment's local. - pub(crate) fn set_local(&mut self, source: SourceId, local: LocalId, obj: TypedVariable) { - let locals = self - .locals - .get_mut(&source) - .expect("locals not initialized"); - locals[local.0] = Some(obj); - } - - pub(crate) fn bind_name(&mut self, name: String, tpe: TypeId) { - self.names.insert(name, tpe); - } - - pub fn get_type_id(&self, name: &str) -> Option { - self.names.get(name).copied() - } -} - -/// The identity of a variable. -/// -/// The main purpose of this struct is to hold the type of a variable, -/// but it also holds if the variable can be reassigned. -#[derive(Debug, Clone, Copy, PartialEq)] -pub(crate) struct TypedVariable { - pub(crate) type_ref: TypeRef, - pub(crate) can_reassign: bool, -} - -impl TypedVariable { - /// Constructs a new mutable variable identity. - pub(crate) fn assignable(type_ref: TypeRef) -> Self { - Self { - type_ref, - can_reassign: true, - } - } - - /// Constructs a new immutable variable identity. - pub(crate) fn immutable(type_ref: TypeRef) -> Self { - Self { - type_ref, - can_reassign: false, - } - } -} diff --git a/analyzer/src/types/engine.rs b/analyzer/src/types/engine.rs deleted file mode 100644 index 2062fb37..00000000 --- a/analyzer/src/types/engine.rs +++ /dev/null @@ -1,307 +0,0 @@ -use context::source::ContentId; - -use crate::engine::Engine; -use crate::environment::Environment; -use crate::relations::{ObjectId, SourceId}; -use crate::types::hir::TypedExpr; -use crate::types::ty::{Field, FunctionDesc, MethodType, StructureDesc, TypeId, TypeRef}; - -/// A typed [`Engine`]. -/// -/// This engine is used to store individual chunks of typed code, such as -/// functions and scripts. -#[derive(Debug, Default)] -pub struct TypedEngine { - /// The user defined chunks of code. - /// - /// At the end of the compilation, this vector has replaced all its `None` values. - entries: Vec>, - - /// All functions definitions. Indexed by a [`FunctionId`] identifier. - functions: Vec, - - /// All structures definitions. Indexed by a [`StructureId`] identifier. - structures: Vec, -} - -/// A function identifier, that points to a [`FunctionDesc`] inside a [`TypedEngine`] -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub struct FunctionId(pub ObjectId); - -/// A structure identifier, that points to a [`StructureDesc`] inside a [`TypedEngine`] -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub struct StructureId(pub ObjectId); - -impl TypedEngine { - /// Initializes a new typed engine with the given capacity. - /// - /// In most cases, the capacity is equal to the number of source objects in - /// the source engine. - pub fn new(capacity: usize) -> Self { - let mut engine = Self { - entries: Vec::new(), - functions: Vec::new(), - structures: Vec::new(), - }; - engine.entries.resize_with(capacity, || None); - engine - } - - pub fn init_empty_structure(&mut self) -> StructureId { - let id = StructureId(self.structures.len()); - self.structures.push(StructureDesc::default()); - id - } - - pub(crate) fn get_structure_mut(&mut self, id: StructureId) -> Option<&mut StructureDesc> { - self.structures.get_mut(id.0) - } - - pub fn get_structure(&self, id: StructureId) -> Option<&StructureDesc> { - self.structures.get(id.0) - } - - pub fn iter_structures(&self) -> impl Iterator { - self.structures.iter() - } - - pub fn get_function(&self, id: FunctionId) -> Option<&FunctionDesc> { - self.functions.get(id.0) - } - - pub(crate) fn get_function_mut(&mut self, id: FunctionId) -> Option<&mut FunctionDesc> { - self.functions.get_mut(id.0) - } - - /// Returns the chunk with the given source id. - /// - /// If the chunk is not a user defined chunk, [`None`] is returned. - /// Use [`Self::get`] to get both user defined and native chunks. - pub fn get_user(&self, id: SourceId) -> Option<&Chunk> { - self.entries.get(id.0)?.as_ref() - } - - pub fn take_user(&mut self, id: SourceId) -> Option { - self.entries.get_mut(id.0)?.take() - } - - /// Inserts a chunk into the engine. - pub fn insert(&mut self, id: SourceId, entry: Chunk) { - self.entries[id.0] = Some(entry); - } - - /// Lists methods with a given name of a given type. - /// - /// If the type is unknown or doesn't have any methods with the given name, - /// [`None`] is returned. - pub fn get_methods(&self, structure_id: StructureId, name: &str) -> Option<&Vec> { - self.structures.get(structure_id.0)?.methods.get(name) - } - - /// Gets the method that matches exactly the given arguments and return type. - pub fn get_method_exact( - &self, - structure_id: StructureId, - name: &str, - args: &[TypeRef], - return_type: TypeRef, - ) -> Option<(&MethodType, FunctionId)> { - self.get_methods(structure_id, name).and_then(|methods| { - methods - .iter() - .find(|function_id| { - let method = &self.functions[function_id.0]; - method.return_type == return_type - && method.parameters.iter().map(|p| &p.ty).eq(args) - }) - .map(|function_id| (&self.functions[function_id.0], *function_id)) - }) - } - - /// Adds a new method to a type. - /// - /// The method may not conflict with any existing methods. - pub fn add_method( - &mut self, - struct_id: StructureId, - name: &str, - method: MethodType, - ) -> FunctionId { - let function_id = self.add_function(method); - - self.structures - .get_mut(struct_id.0) - .expect("structure not initialized") - .methods - .entry(name.to_owned()) - .or_default() - .push(function_id); - - function_id - } - - pub fn add_function(&mut self, function: FunctionDesc) -> FunctionId { - let function_id = FunctionId(self.functions.len()); - self.functions.push(function); - function_id - } - - /// Adds a new generic type parameter to a structure. - pub(crate) fn add_generic(&mut self, struct_id: StructureId, generic: TypeId) { - self.structures - .get_mut(struct_id.0) - .expect("structure not initialized") - .type_parameters - .push(generic); - } - - pub(crate) fn bind_field(&mut self, struct_id: StructureId, name: String, field: Field) { - self.structures - .get_mut(struct_id.0) - .expect("structure not initialized") - .fields - .insert(name, field); - } - - /// returns an iterator over all contained chunks with their identifier - pub fn iter_chunks(&self) -> impl Iterator { - self.entries - .iter() - .enumerate() - .filter_map(|(id, chunk)| chunk.as_ref().map(|chunk| (SourceId(id), chunk))) - } - - pub fn len(&self) -> usize { - self.entries.len() - } - - pub fn is_empty(&self) -> bool { - self.entries.is_empty() - } - - /// Returns an iterator over all contained chunks grouped by they original content source. - pub fn group_by_content<'a>( - &'a self, - engine: &'a Engine, - starting_page: SourceId, - ) -> ContentIterator { - ContentIterator { - typed: self, - engine, - next: starting_page, - } - } -} - -/// A chunk of typed code. -#[derive(Debug)] -pub struct Chunk { - pub function_id: FunctionId, - pub function_type: TypeId, - pub kind: ChunkKind, -} - -#[derive(Debug)] -pub enum ChunkKind { - /// A function with a defined body. - /// The body is set to None if it has been declared inside the analyzer but not yet typed - DefinedFunction(Option), - /// A function only declared (has no defined body) - DeclaredFunction, -} - -/// A group of chunks that were defined in the same content. -#[derive(Debug, Copy, Clone)] -pub struct EncodableContent { - /// The content identifier the chunks are defined in. - pub content_id: ContentId, - start_inclusive: SourceId, - end_exclusive: SourceId, -} - -pub struct ContentIterator<'a> { - typed: &'a TypedEngine, - engine: &'a Engine<'a>, - next: SourceId, -} - -impl<'a> Iterator for ContentIterator<'a> { - type Item = EncodableContent; - - fn next(&mut self) -> Option { - // Verify that there is a next chunk. - if self.next.0 >= self.engine.len() { - return None; - } - - // Get the content id of the next chunk. - let start = self.next; - let content_id = self - .engine - .get_original_content(self.next) - .expect("Invalid source id"); - - // Walk over all chunks that have the same content id. - while let Some(next_content_id) = self.engine.get_original_content({ - self.next.0 += 1; - self.next - }) { - if next_content_id != content_id { - break; - } - } - - // Return a cursor over the chunks of this content. - Some(EncodableContent { - content_id, - start_inclusive: start, - end_exclusive: self.next, - }) - } -} - -impl EncodableContent { - pub fn main_chunk<'a>( - self, - it: &'a ContentIterator<'a>, - ) -> (SourceId, &'a Environment, &'a Chunk) { - let id = self.start_inclusive.0; - let chunk = it.typed.entries[id] - .as_ref() - .expect("Typed engine not properly filled"); - let environment = it.engine.origins[id] - .2 - .as_ref() - .expect("Engine not properly filled"); - (self.start_inclusive, environment, chunk) - } - - pub fn defined_functions<'a>( - self, - it: &'a ContentIterator<'a>, - ) -> impl Iterator { - self.chunks(it) - .filter(move |(_, _, chunk)| matches!(chunk.kind, ChunkKind::DefinedFunction(_))) - } - - pub fn chunks<'a>( - self, - it: &'a ContentIterator<'a>, - ) -> impl Iterator { - let start = self.start_inclusive.0 + 1; - let end = self.end_exclusive.0; - it.engine.origins[start..end] - .iter() - .enumerate() - .filter_map(move |(idx, (_, _, env))| { - let env = env.as_ref().expect("engine not properly filled"); - it.typed.entries[start + idx] - .as_ref() - .map(|c| (SourceId(start + idx), env, c)) - }) - } - - pub fn function_count(&self) -> usize { - self.end_exclusive.0 - self.start_inclusive.0 - 1 - } -} diff --git a/analyzer/src/types/operator.rs b/analyzer/src/types/operator.rs deleted file mode 100644 index 4fff482a..00000000 --- a/analyzer/src/types/operator.rs +++ /dev/null @@ -1,21 +0,0 @@ -use ast::operation::BinaryOperator; - -/// Gets the name of the method that translates the given operator. -pub fn name_operator_method(bin_op: BinaryOperator) -> &'static str { - use BinaryOperator::*; - match bin_op { - Plus => "add", - Minus => "sub", - Times => "mul", - Divide => "div", - Modulo => "mod", - And => "and", - Or => "or", - EqualEqual => "eq", - NotEqual => "ne", - Less => "lt", - LessEqual => "le", - Greater => "gt", - GreaterEqual => "ge", - } -} diff --git a/analyzer/src/types/ty.rs b/analyzer/src/types/ty.rs deleted file mode 100644 index 8122d375..00000000 --- a/analyzer/src/types/ty.rs +++ /dev/null @@ -1,204 +0,0 @@ -use std::collections::HashMap; - -use crate::diagnostic::SourceLocation; -use crate::reef::ReefId; -use crate::relations::{LocalId, ObjectId, SourceId}; -use crate::types::engine::{FunctionId, StructureId}; -use crate::types::{BOOL, ERROR, EXITCODE, FLOAT, INT, NOTHING, UNIT}; - -/// A type identifier in a [`Typing`] instance. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct TypeId(pub ObjectId); - -#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] -pub struct TypeRef { - pub reef: ReefId, - pub type_id: TypeId, -} - -impl TypeRef { - pub const fn new(reef: ReefId, tpe: TypeId) -> Self { - Self { reef, type_id: tpe } - } - - pub fn is_nothing(self) -> bool { - self == NOTHING - } - - pub fn is_something(self) -> bool { - self != NOTHING - } - - pub fn is_ok(self) -> bool { - self != ERROR - } - - pub fn is_err(self) -> bool { - self == ERROR - } - - pub fn is_obj(self) -> bool { - !matches!(self, NOTHING | UNIT | BOOL | EXITCODE | INT | FLOAT | ERROR) - } -} - -/// An instantiated type representation. -/// -/// A type description has usually a single instance, but it can have more than one -/// if it is a generic type description. -#[derive(Clone, Debug, Default, PartialEq)] -pub enum Type { - /// Reports a previous type error that is propagated. - Error, - - /// A type that have not been inferred yet. - #[default] - Unknown, - - /// A type for nothingness, attributed to expressions that never returns - Nothing, - - /// A generic type, that can be instantiated with concrete type parameters. - Polytype, - - /// A callable type, that have a separate definition. - /// with a bound source, if any - Function(Option, FunctionId), - - /// An instance of a generic type with concrete type parameters. - Instantiated(TypeRef, Vec), - - /// A named structured type, with its separate definition - /// with a bound source, if any - Structure(Option, StructureId), -} - -impl Type { - /// Returns whether the type is named. - /// - /// Named types convey a non-positional definition, such as a function. - pub fn is_named(&self) -> bool { - matches!(self, Self::Function(_, _)) - } -} - -#[derive(Clone, Debug, PartialEq)] -pub struct Field { - pub ty: TypeRef, - pub local_id: LocalId, -} - -/// A Structured type -#[derive(Clone, Debug, PartialEq, Default)] -pub struct StructureDesc { - /// Type parameters of the structure - pub type_parameters: Vec, - - /// Fields of the structure. - pub fields: HashMap, - - /// methods of the structure - pub methods: HashMap>, -} - -impl StructureDesc { - pub fn get_fields(&self) -> Vec<&Field> { - let mut field_refs: Vec<_> = self.fields.values().collect(); - field_refs.sort_by_key(|f| f.local_id.0); - field_refs - } -} - -/// A callable function signature. -#[derive(Clone, Debug, PartialEq)] -pub struct FunctionDesc { - /// Type parameters of the function - pub type_parameters: Vec, - - /// The exact parameters that are expected by the function. - pub parameters: Vec, - - /// The return type of the function. - pub return_type: TypeRef, - - /// Kind of function - pub kind: FunctionKind, -} - -#[derive(Clone, Debug, PartialEq)] -pub enum FunctionKind { - Function, - Constructor, -} - -impl FunctionDesc { - /// Create the main function of a script - pub(crate) fn script() -> Self { - Self { - type_parameters: vec![], - parameters: vec![], - return_type: UNIT, - kind: FunctionKind::Function, - } - } - - pub fn constructor( - type_parameters: Vec, - parameters: Vec, - return_type: TypeRef, - ) -> Self { - Self::new( - type_parameters, - parameters, - return_type, - FunctionKind::Constructor, - ) - } - - /// Creates a new function. - pub fn function( - type_parameters: Vec, - parameters: Vec, - return_type: TypeRef, - ) -> Self { - Self::new( - type_parameters, - parameters, - return_type, - FunctionKind::Function, - ) - } - - fn new( - type_parameters: Vec, - parameters: Vec, - return_type: TypeRef, - kind: FunctionKind, - ) -> Self { - Self { - type_parameters, - parameters: parameters - .into_iter() - .enumerate() - .map(|(param_offset, ty)| Parameter { - location: None, - ty, - local_id: LocalId(param_offset), - }) - .collect(), - return_type, - kind, - } - } -} - -/// A function parameter. -#[derive(Clone, Debug, PartialEq)] -pub struct Parameter { - pub(crate) location: Option, - pub ty: TypeRef, - pub local_id: LocalId, -} - -/// A method is a function that only exists on a given type. -pub type MethodType = FunctionDesc; diff --git a/analyzer/src/typing.rs b/analyzer/src/typing.rs new file mode 100644 index 00000000..5f55da29 --- /dev/null +++ b/analyzer/src/typing.rs @@ -0,0 +1,1846 @@ +mod assign; +pub mod function; +mod lower; +pub mod registry; +pub mod schema; +mod shell; +pub mod user; +pub mod variable; + +use crate::hir::{Conditional, Declaration, ExprKind, FunctionCall, Module, TypedExpr}; +use crate::module::ModuleView; +use crate::symbol::{Symbol, SymbolDesc, SymbolRegistry, UndefinedSymbol}; +use crate::typing::assign::ascribe_assign; +use crate::typing::function::Function; +use crate::typing::lower::ascribe_template_string; +use crate::typing::registry::{FunctionId, Registry, SchemaId}; +use crate::typing::schema::Schema; +use crate::typing::shell::{ + ascribe_call, ascribe_detached, ascribe_pipeline, ascribe_redirected, ascribe_substitution, +}; +use crate::typing::user::{ + lookup_builtin_type, TypeArena, TypeId, UserType, BOOL_TYPE, ERROR_TYPE, FLOAT_TYPE, INT_TYPE, + NOTHING_TYPE, STRING_TYPE, UNIT_TYPE, UNKNOWN_TYPE, +}; +use crate::typing::variable::{SymbolEntry, VariableTable}; +use crate::{Database, PipelineError, Reef, SourceLocation}; +use ast::call::{MethodCall, ProgrammaticCall}; +use ast::control_flow::If; +use ast::function::FunctionDeclaration; +use ast::group::Block; +use ast::r#struct::{FieldAccess, StructImpl}; +use ast::r#type::{ByName, ParametrizedType, Type}; +use ast::r#use::{Import, InclusionPathItem, Use}; +use ast::value::{Literal, LiteralValue}; +use ast::variable::{VarDeclaration, VarKind, VarReference}; +use ast::Expr; +use context::source::{SourceSegmentHolder, Span}; +use parser::Root; +use std::ffi::OsStr; +use std::path::PathBuf; +use thiserror::Error; + +pub(super) fn type_check( + reef: &mut Reef, + Database { + exports, + ref mut checker, + }: &mut Database, + sorted: Vec, +) -> Vec { + let mut errors = Vec::::new(); + for path in sorted { + let root = reef.files.get(&path).expect("file should be present"); + let mut table = VariableTable::new( + reef.symbols + .get_mut(&path) + .expect("table should be present"), + ); + let mut current_module = Module::new(path.clone()); + ascribe_types( + root, + &mut table, + checker, + &mut current_module, + ModuleView::new(&reef.exports, exports), + &mut errors, + ); + current_module.exports = table.take_exports(); + let all_module_exports = reef + .exports + .get_full_mut(&path) + .expect("module should exist"); + for (variable, ty) in ¤t_module.exports { + if let Some(hoisted_export) = all_module_exports.exports.iter_mut().find(|export| { + export.name == variable.as_str() && export.registry == SymbolRegistry::Variable + }) { + hoisted_export.ty = *ty; + } + } + reef.hir.insert(path, current_module); + } + errors +} + +/// A structure that holds the different type information. +#[derive(Default)] +pub struct TypeChecker { + pub types: TypeArena, + pub registry: Registry, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Parameter { + pub ty: TypeId, + pub span: Span, +} + +impl TypeChecker { + fn display(&self, ty: TypeId) -> String { + match &self.types[ty] { + UserType::Unknown => "Unknown".to_string(), + UserType::Error => "Error".to_string(), + UserType::Nothing => "Nothing".to_string(), + UserType::Unit => "Unit".to_string(), + UserType::Function(function) => { + let Function { + param_types, + return_type, + .. + } = &self.registry[*function]; + let params = param_types + .iter() + .map(|param| self.display(param.ty)) + .collect::>() + .join(", "); + format!("({params}) -> {}", self.display(*return_type)) + } + UserType::Parametrized { schema, params } => { + let Schema { + name, + generic_variables, + .. + } = &self.registry[*schema]; + let params = params + .iter() + .map(|ty| self.display(*ty)) + .collect::>() + .join(", "); + if generic_variables.is_empty() { + name.to_owned() + } else { + format!("{name}[{params}]") + } + } + UserType::Module(path) => path + .iter() + .map(|item| item.to_string()) + .collect::>() + .join("::"), + UserType::GenericVariable(name) => name.clone(), + } + } + + fn get_field(&mut self, ty: TypeId, field: &str) -> Result { + match &self.types[ty] { + UserType::Error => Ok(ERROR_TYPE), + UserType::Nothing | UserType::Unit => Err(FieldError::ExpectedStruct), + UserType::Parametrized { schema, params } => { + let Schema { + generic_variables, + fields, + methods, + .. + } = &self.registry[*schema]; + if let Some(field) = fields.get(field) { + Ok( + if let Some(concrete_ty) = + generic_variables.iter().position(|&ty| ty == field.ty) + { + params[concrete_ty] + } else if generic_variables.is_empty() { + field.ty + } else { + // TODO: use concretize + match &self.types[field.ty] { + UserType::Parametrized { + schema, + params: sub_params, + } => { + let params = sub_params + .iter() + .map(|ty| { + if let Some(concrete_ty) = + generic_variables.iter().position(|&pty| pty == *ty) + { + params[concrete_ty] + } else { + *ty + } + }) + .collect::>(); + self.types.alloc(UserType::Parametrized { + schema: *schema, + params, + }) + } + _ => field.ty, + } + }, + ) + } else if let Some(method) = methods.get(field) { + Err(FieldError::IsMethod(*method)) + } else { + Err(FieldError::UnknownField { + available: fields.keys().cloned().collect(), + }) + } + } + _ => Err(FieldError::UnknownField { + available: Vec::new(), + }), + } + } +} + +pub(crate) struct UnifyError; + +enum FieldError { + ExpectedStruct, + UnknownField { available: Vec }, + IsMethod(FunctionId), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TypeError { + pub kind: TypeErrorKind, + pub at: SourceLocation, +} + +impl TypeError { + pub fn new(kind: TypeErrorKind, at: SourceLocation) -> Self { + Self { kind, at } + } +} + +#[derive(Error, Debug, Clone, PartialEq, Eq)] +pub enum TypeErrorKind { + #[error("undefined {expected} `{name}`")] + UndefinedSymbol { + name: String, + expected: SymbolRegistry, + found: Option, + }, + + #[error("duplicate symbol `{name}`")] + DuplicateSymbol { name: String, previous: Span }, + + #[error("missing type")] + MissingType, + + #[error("type mismatch, expected `{expected}`, received `{actual}`")] + TypeMismatch { + expected: String, + expected_due_to: Option, + actual: String, + }, + + #[error("expected {expected} arguments but received {received}")] + ArityMismatch { expected: usize, received: usize }, + + #[error("no field `{name}` on type `{type_name}`")] + UnknownField { + name: String, + type_name: String, + available: Vec, + }, + + #[error("no method `{name}` on type `{type_name}`")] + UnknownMethod { name: String, type_name: String }, + + #[error("type annotation needed")] + TypeAnnotationRequired { + types: Vec, + insert_at: usize, + }, + + #[error("return statement outside of function body")] + ReturnOutsideFunction, + + #[error("repeated parameter name `{name}`")] + RepeatedParameterName { name: String, previous: Span }, + + #[error("`self` parameter is only allowed in methods")] + UnexpectedSelfParameter, + + #[error("cannot define an implementation for primitive types")] + CannotImplPrimitive, + + #[error("attempted to a access a method like a field")] + MethodLikeFieldAccess { name: String, parentheses: String }, + + #[error("cannot assign twice to immutable variable `{name}`")] + CannotReassign { name: String }, + + #[error("found circular dependency")] + CircularDependency { cycle: Vec }, +} + +impl From for PipelineError { + fn from(err: TypeError) -> Self { + PipelineError::Type(err) + } +} + +/// Informs the type inference algorithm about the locally expected type. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum TypeHint { + /// The return type is immediately discarded. + /// + /// Subbranches are not required to have a specific type and may return different types. + Unused, + + /// The return type is used, but not required to be a specific type. + /// + /// It forces subexpressions to coerce to an exact common type. + Used, + + /// The type is used and is expected to be a specific type. + /// + /// If the expression calls a generic function, this type may be used during the type inference + /// if not provided explicitly for instance. + Required(TypeId), +} + +impl TypeHint { + fn is_used(self) -> bool { + matches!(self, TypeHint::Used | TypeHint::Required(_)) + } +} + +#[derive(Clone, Copy)] +struct Context<'a> { + modules: ModuleView<'a>, + hint: TypeHint, + return_ty: Option<&'a Return>, +} + +#[derive(Clone)] +struct Return { + ty: TypeId, + span: Span, +} + +impl<'a> Context<'a> { + fn with_hint(self, hint: TypeHint) -> Self { + Self { hint, ..self } + } + + fn with_return(self, return_ty: &'a Return) -> Self { + Self { + return_ty: Some(return_ty), + ..self + } + } +} + +pub(super) fn ascribe_types( + root: &Root, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + modules: ModuleView, + errors: &mut Vec, +) { + let mut expressions = Vec::new(); + table.push_environment(); + for expr in &root.expressions { + let ctx = Context { + modules, + hint: TypeHint::Unused, + return_ty: None, + }; + expressions.push(ascribe_type(expr, table, checker, storage, ctx, errors)); + } + let hir = TypedExpr { + kind: ExprKind::Block(expressions), + span: 0..0, + ty: UNIT_TYPE, + }; + storage.add(None, hir, table.pop_environment()); +} + +fn ascribe_type( + expr: &Expr, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx @ Context { modules, hint, .. }: Context, + errors: &mut Vec, +) -> TypedExpr { + match expr { + Expr::Use(Use { + import, + segment: span, + }) => { + ascribe_import(import, table, checker, modules, errors); + TypedExpr::noop(span.clone()) + } + Expr::Literal(Literal { + parsed, + segment: span, + }) => TypedExpr { + kind: ExprKind::Literal(parsed.clone()), + span: span.clone(), + ty: match parsed { + LiteralValue::String(_) => STRING_TYPE, + LiteralValue::Int(_) => INT_TYPE, + LiteralValue::Float(_) => FLOAT_TYPE, + LiteralValue::Bool(_) => BOOL_TYPE, + }, + }, + Expr::TemplateString(tpl) => { + ascribe_template_string(tpl, table, checker, storage, ctx, errors) + } + Expr::VarDeclaration(VarDeclaration { + kind, + var, + initializer: Some(initializer), + segment: span, + }) => { + let expected_ty = var + .ty + .as_ref() + .map(|ty| lookup_type(ty, table, checker, modules, errors)); + let ctx = ctx.with_hint(expected_ty.map_or(TypeHint::Used, TypeHint::Required)); + let typed_initializer = ascribe_type(initializer, table, checker, storage, ctx, errors); + let mut ty = typed_initializer.ty; + if typed_initializer.is_ok() { + if let Some(expected_ty) = expected_ty { + if let Err(_) = checker.types.unify(ty, expected_ty) { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(expected_ty), + expected_due_to: Some(SourceLocation::new( + table.path().to_owned(), + var.ty.as_ref().unwrap().segment(), + )), + actual: checker.display(ty), + }, + SourceLocation::new(table.path().to_owned(), initializer.segment()), + )); + } + ty = expected_ty; + } + } + let var = table.insert_variable( + var.name.value.to_string(), + ty, + var.name.segment(), + *kind == VarKind::Var, + ); + TypedExpr { + kind: ExprKind::Declare(Declaration { + identifier: var.clone(), + value: Some(Box::new(typed_initializer)), + }), + span: span.clone(), + ty: UNIT_TYPE, + } + } + Expr::Assign(assign) => ascribe_assign(assign, table, checker, storage, ctx, errors), + Expr::FunctionDeclaration(fn_decl) => { + ascribe_fn_decl(fn_decl, None, table, checker, storage, ctx, errors); + TypedExpr::noop(fn_decl.segment()) + } + Expr::Call(call) => ascribe_call(call, table, checker, storage, ctx, errors), + Expr::Substitution(sub) => ascribe_substitution(sub, table, checker, storage, ctx, errors), + Expr::ProgrammaticCall(ProgrammaticCall { + path, + arguments, + type_parameters, + segment: span, + }) => { + let arguments = arguments + .iter() + .map(|expr| ascribe_type(expr, table, checker, storage, ctx, errors)) + .collect::>(); + let ty = lookup_path( + path, + SymbolRegistry::Function, + table, + checker, + modules, + errors, + ); + if ty.is_err() { + return TypedExpr { + kind: ExprKind::Noop, + span: span.clone(), + ty: ERROR_TYPE, + }; + } + let mut type_parameters = type_parameters + .iter() + .map(|type_param| lookup_type(type_param, table, checker, modules, errors)) + .collect::>(); + let UserType::Function(function) = checker.types[ty] else { + panic!( + "function should have a function type {ty:?} {:?}", + &checker.types[ty] + ); + }; + let Function { + ref declared_at, + fqn: _, + ref generic_variables, + ref param_types, + return_type, + kind: _, + } = checker.registry[function]; + let mut return_type = return_type; + if type_parameters.is_empty() && !generic_variables.is_empty() { + // Try to infer the generic types from the actual arguments + type_parameters = vec![UNKNOWN_TYPE; generic_variables.len()]; + for (arg, param) in arguments.iter().zip(param_types.iter()) { + if let Some(generic_variable) = + generic_variables.iter().position(|&ty| ty == param.ty) + { + if type_parameters[generic_variable] != UNKNOWN_TYPE + && type_parameters[generic_variable] != arg.ty + { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(type_parameters[generic_variable]), + expected_due_to: None, + actual: checker.display(arg.ty), + }, + SourceLocation::new(table.path().to_owned(), arg.span.clone()), + )); + } else { + type_parameters[generic_variable] = arg.ty; + } + } else if let UserType::Parametrized { + schema: param_schema, + params: param_params, + .. + } = &checker.types[param.ty] + { + if let UserType::Parametrized { + schema, + params: arg_params, + } = &checker.types[arg.ty] + { + if schema == param_schema { + for param_param in param_params { + if let Some(idx) = + generic_variables.iter().position(|&ty| ty == *param_param) + { + type_parameters[idx].define_if_absent(arg_params[idx]); + } + } + } + } + } + } + if let TypeHint::Required(expected_return_ty) = hint { + if let Some(idx) = generic_variables.iter().position(|&ty| ty == return_type) { + type_parameters[idx].define_if_absent(expected_return_ty); + } else if let UserType::Parametrized { + schema: expected_schema, + params: expected_params, + .. + } = &checker.types[expected_return_ty] + { + if let UserType::Parametrized { + schema, + params: fn_return_params, + } = &checker.types[return_type] + { + if schema == expected_schema { + // First, get the index of the generic_variables in the return_params list + for (fn_return_param, fn_actual) in + fn_return_params.iter().zip(expected_params) + { + if let Some(generic_idx) = generic_variables + .iter() + .position(|&ty| ty == *fn_return_param) + { + type_parameters[generic_idx].define_if_absent(*fn_actual); + } + } + } + } + } + } + if type_parameters.iter().any(|ty| *ty == UNKNOWN_TYPE) { + errors.push(TypeError::new( + TypeErrorKind::TypeAnnotationRequired { + types: generic_variables + .iter() + .map(|ty| checker.display(*ty)) + .collect(), + insert_at: path + .last() + .expect("path should have at least one item") + .segment() + .end, + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + return_type = ERROR_TYPE; + } + } + + if arguments.len() != param_types.len() { + errors.push(TypeError::new( + TypeErrorKind::ArityMismatch { + expected: param_types.len(), + received: arguments.len(), + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + } else { + for (arg, param) in arguments.iter().zip(param_types.iter()) { + let param_ty = + checker + .types + .concretize(param.ty, generic_variables, &type_parameters); + if let Err(_) = checker.types.unify(arg.ty, param_ty) { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(param_ty), + expected_due_to: Some(SourceLocation::new( + declared_at.clone(), + param.span.clone(), + )), + actual: checker.display(arg.ty), + }, + SourceLocation::new(table.path().to_owned(), arg.span.clone()), + )); + } + } + } + return_type = + checker + .types + .concretize(return_type, generic_variables, &type_parameters); + TypedExpr { + kind: ExprKind::FunctionCall(FunctionCall { + arguments, + function_id: function, + }), + span: span.clone(), + ty: return_type, + } + } + Expr::StructDeclaration(decl) => TypedExpr::noop(decl.segment.clone()), + Expr::FieldAccess(FieldAccess { + expr, + field, + segment: span, + }) => { + let typed_expr = ascribe_type(expr, table, checker, storage, ctx, errors); + match checker.get_field(typed_expr.ty, field.value.as_str()) { + Ok(field_ty) => { + return TypedExpr { + kind: ExprKind::Noop, + span: span.clone(), + ty: field_ty, + }; + } + Err(FieldError::ExpectedStruct) => { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Struct".to_string(), + expected_due_to: None, + actual: checker.display(typed_expr.ty), + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + } + Err(FieldError::UnknownField { available }) => { + errors.push(TypeError::new( + TypeErrorKind::UnknownField { + name: field.value.to_string(), + type_name: checker.display(typed_expr.ty), + available, + }, + SourceLocation::new(table.path().to_owned(), field.segment()), + )); + } + Err(FieldError::IsMethod(method)) => { + let Function { + ref param_types, .. + } = checker.registry[method]; + let mut builder = "(".to_owned(); + for param in param_types { + if param.ty == typed_expr.ty { + continue; + } + if builder.ends_with('(') { + builder.push('_'); + } else { + builder.push_str(", _"); + } + } + builder.push(')'); + errors.push(TypeError::new( + TypeErrorKind::MethodLikeFieldAccess { + name: field.value.to_string(), + parentheses: builder, + }, + SourceLocation::new(table.path().to_owned(), field.segment()), + )); + } + } + TypedExpr::error(span.clone()) + } + Expr::VarReference(VarReference { + name, + segment: span, + }) => match table.lookup_variable(name.name()) { + Ok(var) => TypedExpr { + kind: ExprKind::Reference(var.id.clone()), + span: span.clone(), + ty: var.ty, + }, + Err(err) => { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: name.name().to_owned(), + expected: SymbolRegistry::Variable, + found: err.into(), + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + TypedExpr::error(span.clone()) + } + }, + Expr::Block(Block { + expressions, + segment: span, + }) => { + table.enter_scope(); + let expressions = expressions + .iter() + .map(|expr| ascribe_type(expr, table, checker, storage, ctx, errors)) + .collect::>(); + table.exit_scope(); + let last_ty = expressions.last().map(|expr| expr.ty).unwrap_or(UNIT_TYPE); + TypedExpr { + kind: ExprKind::Block(expressions), + span: span.clone(), + ty: last_ty, + } + } + Expr::Redirected(redirected) => { + ascribe_redirected(redirected, table, checker, storage, ctx, errors) + } + Expr::Detached(detached) => { + ascribe_detached(detached, table, checker, storage, ctx, errors) + } + Expr::Pipeline(pipeline) => { + ascribe_pipeline(pipeline, table, checker, storage, ctx, errors) + } + Expr::If(If { + condition, + success_branch, + fail_branch, + segment: span, + }) => { + let typed_condition = ascribe_type(condition, table, checker, storage, ctx, errors); + if let Err(_) = checker.types.unify(typed_condition.ty, BOOL_TYPE) { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Bool".to_owned(), + expected_due_to: None, + actual: checker.display(typed_condition.ty), + }, + SourceLocation::new(table.path().to_owned(), condition.segment()), + )); + } + let then_branch = ascribe_type(success_branch, table, checker, storage, ctx, errors); + let otherwise_branch = fail_branch + .as_ref() + .map(|branch| ascribe_type(branch, table, checker, storage, ctx, errors)) + .unwrap_or_else(|| TypedExpr::noop(span.clone())); + if hint.is_used() && then_branch.ty != otherwise_branch.ty { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(then_branch.ty), + expected_due_to: Some(SourceLocation::new( + table.path().to_owned(), + success_branch.segment(), + )), + actual: checker.display(otherwise_branch.ty), + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + } + let ty = then_branch.ty; + TypedExpr { + kind: ExprKind::Conditional(Conditional { + condition: Box::new(typed_condition), + then: Box::new(then_branch), + otherwise: Some(Box::new(otherwise_branch)), + }), + span: span.clone(), + ty, + } + } + Expr::Return(ast::function::Return { + expr, + segment: span, + }) => { + let typed_expr = expr + .as_ref() + .map(|expr| ascribe_type(expr, table, checker, storage, ctx, errors)); + let ty = typed_expr.as_ref().map_or(UNIT_TYPE, |expr| expr.ty); + if let Some(Return { + ty: return_ty, + span: return_span, + }) = ctx.return_ty + { + if let Err(_) = checker.types.unify(ty, *return_ty) { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(*return_ty), + expected_due_to: Some(SourceLocation::new( + table.path().to_owned(), + return_span.clone(), + )), + actual: checker.display(ty), + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + } + } else { + errors.push(TypeError::new( + TypeErrorKind::ReturnOutsideFunction, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + } + TypedExpr { + kind: ExprKind::Return(typed_expr.map(Box::new)), + span: span.clone(), + ty: NOTHING_TYPE, + } + } + Expr::Impl(StructImpl { + type_parameters, + impl_type, + functions, + segment: span, + }) => { + table.enter_scope(); + for type_param in type_parameters.iter() { + table.insert_local( + type_param.name.to_string(), + UNKNOWN_TYPE, + type_param.segment.clone(), + SymbolEntry::Type, + ); + } + let self_ty = lookup_type(impl_type, table, checker, modules, errors); + for function in functions { + ascribe_fn_decl( + function, + Some(self_ty), + table, + checker, + storage, + ctx, + errors, + ); + } + table.exit_scope(); + TypedExpr::noop(span.clone()) + } + Expr::MethodCall(MethodCall { + source, + name: ident, + arguments, + type_parameters, + segment: span, + }) => { + let typed_source = ascribe_type(source, table, checker, storage, ctx, errors); + if typed_source.ty.is_err() { + return TypedExpr::error(span.clone()); + } + let type_parameters = type_parameters + .iter() + .map(|type_param| lookup_type(type_param, table, checker, modules, errors)) + .collect::>(); + let args = arguments + .iter() + .map(|expr| ascribe_type(expr, table, checker, storage, ctx, errors)) + .collect::>(); + let name = ident.as_ref().map_or("apply", |name| name.value.as_str()); + match &checker.types[typed_source.ty] { + UserType::Parametrized { schema, params } => { + let Schema { + fields, methods, .. + } = &checker.registry[*schema]; + if let Some(method) = methods.get(name) { + let Function { + ref generic_variables, + return_type, + .. + } = checker.registry[*method]; + let type_parameters = { + let mut parameters = params.clone(); + parameters.extend(type_parameters); + parameters + }; + let return_type = checker.types.concretize( + return_type, + generic_variables, + &type_parameters, + ); + TypedExpr { + kind: ExprKind::Noop, + span: span.clone(), + ty: return_type, + } + } else if let Some(field) = fields.get(name) { + errors.push(TypeError::new( + TypeErrorKind::MethodLikeFieldAccess { + name: name.to_string(), + parentheses: "()".to_string(), + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + TypedExpr::error(span.clone()) + } else { + errors.push(TypeError::new( + TypeErrorKind::UnknownField { + name: name.to_string(), + type_name: checker.display(typed_source.ty), + available: Vec::new(), + }, + SourceLocation::new( + table.path().to_owned(), + ident.as_ref().map_or(span.clone(), |ident| ident.segment()), + ), + )); + TypedExpr::error(span.clone()) + } + } + _ => { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Struct".to_string(), + expected_due_to: None, + actual: checker.display(typed_source.ty), + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + TypedExpr::error(span.clone()) + } + } + } + expr => todo!("{expr:?}"), + } +} + +fn ascribe_fn_decl( + FunctionDeclaration { + name: ident, + type_parameters, + parameters, + return_type: return_ty_ident, + body, + segment: span, + }: &FunctionDeclaration, + current_ty: Option, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) { + table.push_environment(); + let function = match current_ty { + Some(ty) => { + let UserType::Parametrized { schema, .. } = checker.types[ty] else { + panic!( + "function should have a struct type, got {:?}", + checker.types[ty] + ); + }; + let Schema { + ref mut methods, .. + } = checker.registry[schema]; + *methods + .get(ident.value.as_str()) + .expect("method should be defined in the struct") + } + None => { + let Symbol { ty, .. } = table + .get(&ident.value, SymbolRegistry::Function) + .expect("function should be defined in the table"); + let UserType::Function(function) = checker.types[*ty] else { + panic!( + "function should have a function type, got {:?}", + checker.types[*ty] + ); + }; + function + } + }; + let Function { + ref generic_variables, + ref param_types, + return_type, + .. + } = checker.registry[function]; + table.enter_scope(); + storage.enter_namespace(ident.value.as_str()); + for type_param in type_parameters.iter() { + table.insert_local( + type_param.name.to_string(), + UNKNOWN_TYPE, + type_param.segment.clone(), + SymbolEntry::Type, + ); + } + for (i, (param, param_ty)) in parameters.iter().zip(param_types.iter()).enumerate() { + if let Some(previous) = parameters[..i] + .iter() + .find(|prev| prev.name() == param.name()) + { + errors.push(TypeError::new( + TypeErrorKind::RepeatedParameterName { + name: param.name().to_owned(), + previous: previous.segment().clone(), + }, + SourceLocation::new(table.path().to_owned(), param.segment()), + )); + } + table.insert_variable(param.name().to_owned(), param_ty.ty, span.clone(), false); + } + if let Some(body) = body.as_ref() { + let ret = Return { + ty: return_type, + span: match return_ty_ident { + None => ident.segment(), + Some(ref ret) => ret.segment(), + }, + }; + let ctx = ctx.with_return(&ret); + let typed_body = ascribe_type(body, table, checker, storage, ctx, errors); + if let Err(_) = checker.types.unify(typed_body.ty, return_type) { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(return_type), + expected_due_to: return_ty_ident + .as_ref() + .map(|ty| SourceLocation::new(table.path().to_owned(), ty.segment())), + actual: checker.display(typed_body.ty), + }, + SourceLocation::new(table.path().to_owned(), body.segment()), + )); + } + storage.add(Some(function), typed_body, table.pop_environment()); + } + storage.exit_namespace(); + table.exit_scope(); +} + +fn lookup_type( + ty: &Type, + table: &mut VariableTable, + checker: &mut TypeChecker, + modules: ModuleView, + errors: &mut Vec, +) -> TypeId { + match ty { + Type::Parametrized(ParametrizedType { + path, + params, + segment: span, + }) => { + // let ty = lookup_path(path, SymbolRegistry::Type, table, checker, modules, errors); + let ty = if let [InclusionPathItem::Symbol(item)] = path.as_slice() { + lookup_builtin_type(item.value.as_str()).unwrap_or_else(|| { + lookup_path(path, SymbolRegistry::Type, table, checker, modules, errors) + }) + } else { + lookup_path(path, SymbolRegistry::Type, table, checker, modules, errors) + }; + if ty.is_err() { + return ERROR_TYPE; + } + let type_params = params + .iter() + .map(|ty| lookup_type(ty, table, checker, modules, errors)) + .collect::>(); + let mut schema: Option = None; + let generic_variables = match &checker.types[ty] { + UserType::Parametrized { + schema: found_schema, + .. + } => { + let Schema { + generic_variables, .. + } = &checker.registry[*found_schema]; + schema = Some(*found_schema); + generic_variables.as_slice() + } + _ => &[], + }; + if generic_variables.len() != params.len() { + errors.push(TypeError::new( + TypeErrorKind::ArityMismatch { + expected: generic_variables.len(), + received: params.len(), + }, + SourceLocation::new( + table.path().to_owned(), + if let Some((first, last)) = params.first().zip(params.last()) { + first.segment().start..last.segment().end + } else { + span.clone() + }, + ), + )); + ty + } else if generic_variables == type_params { + ty + } else { + let Some(schema) = schema else { + return ERROR_TYPE; + }; + checker.types.alloc(UserType::Parametrized { + schema, + params: type_params, + }) + } + } + Type::Callable(_) => todo!(), + Type::ByName(ByName { name: ty, .. }) => lookup_type(ty, table, checker, modules, errors), + } +} + +fn lookup_path( + path: &[InclusionPathItem], + registry: SymbolRegistry, + table: &mut VariableTable, + checker: &TypeChecker, + modules: ModuleView, + errors: &mut Vec, +) -> TypeId { + let (first, rest) = path.split_first().expect("path should not be empty"); + let mut tree = match first { + InclusionPathItem::Symbol(ident) => match table.lookup(ident.value.as_str(), registry) { + Ok(symbol) => { + if !rest.is_empty() { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: ident.value.to_string(), + expected: registry, + found: Some(SymbolDesc { + registry: symbol.registry, + span: symbol.declared_at.clone(), + }), + }, + SourceLocation::new(table.path().to_owned(), ident.segment()), + )); + } + return symbol.ty; + } + Err(UndefinedSymbol::WrongRegistry(SymbolDesc { + registry: SymbolRegistry::Type, + .. + })) => { + let symbol = table + .get(ident.value.as_str(), SymbolRegistry::Type) + .expect("module should be defined in the table"); + let UserType::Module(path) = &checker.types[symbol.ty] else { + panic!( + "module should have a module type, got {:?}", + checker.types[symbol.ty] + ); + }; + match modules.get_direct(path) { + Some(tree) => tree, + None => { + return ERROR_TYPE; + } + } + } + Err(err) => { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: ident.value.to_string(), + expected: registry, + found: err.into(), + }, + SourceLocation::new(table.path().to_owned(), ident.segment()), + )); + return ERROR_TYPE; + } + }, + InclusionPathItem::Reef(_) => modules.current, + }; + let Some((last, rest)) = rest.split_last() else { + return ERROR_TYPE; + }; + for item in rest { + let InclusionPathItem::Symbol(ident) = item else { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: item.to_string(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new(table.path().to_owned(), item.segment()), + )); + return ERROR_TYPE; + }; + match tree.get(OsStr::new(ident.value.as_str())) { + Some(child_tree) => tree = child_tree, + None => { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: item.to_string(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new(table.path().to_owned(), item.segment()), + )); + return ERROR_TYPE; + } + } + } + match last { + InclusionPathItem::Symbol(ident) => { + if let Some(export) = tree + .exports + .iter() + .find(|export| export.name == ident.value && export.registry == registry) + { + export.ty + } else { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: last.to_string(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new(table.path().to_owned(), ident.segment()), + )); + ERROR_TYPE + } + } + InclusionPathItem::Reef(span) => { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: last.to_string(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + ERROR_TYPE + } + } +} + +fn ascribe_import( + import: &Import, + table: &mut VariableTable, + checker: &mut TypeChecker, + modules: ModuleView, + errors: &mut Vec, +) { + match import { + Import::Symbol(item) => { + let (first, rest) = item.path.split_first().expect("path should not be empty"); + let Some(mut tree) = modules.get(first) else { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: first.to_string(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new(table.path().to_owned(), first.segment()), + )); + return; + }; + let Some((last, rest)) = rest.split_last() else { + return; + }; + for item in rest { + let InclusionPathItem::Symbol(ident) = item else { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: item.to_string(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new(table.path().to_owned(), item.segment()), + )); + return; + }; + match tree.get(OsStr::new(ident.value.as_str())) { + Some(child_tree) => tree = child_tree, + None => { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: item.to_string(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new(table.path().to_owned(), item.segment()), + )); + return; + } + } + } + match last { + InclusionPathItem::Symbol(ident) => { + let mut found = false; + for export in tree.exports.iter() { + if export.name != ident.value { + continue; + } + found = true; + table.insert_remote(ident.value.to_string(), ident.segment(), export); + } + for child in tree.children.iter() { + if child.name != OsStr::new(&ident.value) { + continue; + } + found = true; + table.insert_local( + ident.value.to_string(), + checker.types.alloc(UserType::Module(item.path.clone())), + ident.segment(), + SymbolEntry::Type, + ); + } + if !found { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: ident.value.to_string(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new(table.path().to_owned(), ident.segment()), + )); + } + } + InclusionPathItem::Reef(span) => { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: last.to_string(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + } + } + } + Import::AllIn(_, _) => {} + Import::Environment(_) => {} + Import::List(_) => {} + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::hoist::hoist_files; + use crate::module::import_multi; + use crate::{Database, MemoryFilesystem}; + use std::collections::HashMap; + use std::ffi::OsString; + use std::path::PathBuf; + + fn type_check(source: &str) -> Vec { + let fs = MemoryFilesystem::new(HashMap::from([(PathBuf::from("main.msh"), source)])); + check(fs, "main.msh") + } + + fn check(fs: MemoryFilesystem, entrypoint: &str) -> Vec { + let mut database = Database::default(); + let mut reef = Reef::new(OsString::from("test")); + assert_eq!( + import_multi(&mut reef, &fs, entrypoint), + [], + "no import errors should be found" + ); + let hoist_result = hoist_files(&database.exports, &mut reef, &mut database.checker); + assert_eq!( + hoist_result.errors, + [], + "no hoisting errors should be found" + ); + super::type_check(&mut reef, &mut database, hoist_result.sorted) + } + + fn type_check_multi(sources: [(PathBuf, &str); N]) -> Vec { + let entrypoint = sources + .first() + .expect("at least one source") + .0 + .display() + .to_string(); + check( + MemoryFilesystem::from_iter(sources.into_iter()), + &entrypoint, + ) + } + + #[test] + fn valid_var_type_annotation() { + let errors = type_check("val x: Int = 1"); + assert_eq!(errors, []); + } + + #[test] + fn invalid_var_type_annotation() { + let errors = type_check("val x: Int = true"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Int".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("main.msh"), 7..10)), + actual: "Bool".to_owned(), + }, + SourceLocation::new(PathBuf::from("main.msh"), 13..17), + )] + ); + } + + #[test] + fn analyze_unresolved_back_import() { + let errors = type_check_multi([ + (PathBuf::from("a"), "use reef::b::b"), + (PathBuf::from("b"), "use reef::a::a\nfun b() = {}"), + ]); + assert_eq!( + errors, + vec![TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: "a".to_string(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new(PathBuf::from("b"), 13..14) + )] + ); + } + + #[test] + fn composed_path() { + let errors = type_check_multi([ + (PathBuf::from("entry"), "use reef::foo::bar\nbar::here()"), + (PathBuf::from("foo/bar"), "fun here() = {}"), + (PathBuf::from("foo"), "use reef::foo::bar"), + ]); + assert_eq!(errors, []); + } + + #[test] + fn one_path_error() { + let errors = type_check_multi([ + (PathBuf::from("test"), "reef::bar::test::foo()"), + (PathBuf::from("bar"), "fun test() = {}"), + ]); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: "test".to_owned(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new(PathBuf::from("test"), 11..15), + )] + ); + } + + #[test] + fn pass_types_across_files() { + let errors = type_check_multi([ + (PathBuf::from("test"), "val x: Int = reef::bar::truthy()"), + (PathBuf::from("bar"), "fun truthy() -> Bool = true"), + ]); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Int".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("test"), 7..10)), + actual: "Bool".to_owned(), + }, + SourceLocation::new(PathBuf::from("test"), 13..32), + )] + ); + } + + #[test] + fn parameter_type_mismatch() { + let errors = type_check_multi([ + (PathBuf::from("main"), "reef::lib::play(5, true)"), + (PathBuf::from("lib"), "fun play(x: Bool, y: Int) = {}"), + ]); + assert_eq!( + errors, + [ + TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Bool".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("lib"), 9..16)), + actual: "Int".to_owned(), + }, + SourceLocation::new(PathBuf::from("main"), 16..17), + ), + TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Int".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("lib"), 18..24)), + actual: "Bool".to_owned(), + }, + SourceLocation::new(PathBuf::from("main"), 19..23), + ) + ] + ); + } + + #[test] + #[ignore] + fn invalid_binary_op() { + let errors = type_check("'test' - 4"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Int".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("main.msh"), 9..10)), + actual: "String".to_owned(), + }, + SourceLocation::new(PathBuf::from("main.msh"), 0..10), + )] + ); + } + + #[test] + fn use_struct_attribute() { + let errors = type_check("struct Bar { test: Int }; fun test(b: Bar) -> Int = $b.test"); + assert_eq!(errors, []); + } + + #[test] + fn use_unknown_struct_attribute() { + let errors = type_check("struct Foo { bar: String }; fun test(b: Foo) -> String = $b.test"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::UnknownField { + name: "test".to_owned(), + type_name: "Foo".to_owned(), + available: vec!["bar".to_owned()] + }, + SourceLocation::new(PathBuf::from("main.msh"), 60..64), + )] + ); + } + + #[test] + fn generic_identity_function() { + let errors = type_check("fun identity[T](x: T) -> T = $x\nval x: Int = identity(5)"); + assert_eq!(errors, []); + } + + #[test] + fn generic_explicit_function() { + let errors = type_check("fun try[T](x: T); try::[String](5)"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "String".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("main.msh"), 11..15)), + actual: "Int".to_owned(), + }, + SourceLocation::new(PathBuf::from("main.msh"), 32..33), + )] + ); + } + + #[test] + fn cannot_infer_generic() { + let errors = type_check("fun bar[T]() -> T; bar()"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeAnnotationRequired { + types: vec!["T".to_owned()], + insert_at: 22, + }, + SourceLocation::new(PathBuf::from("main.msh"), 19..24), + )] + ); + } + + #[test] + fn simple_return() { + let errors = type_check("fun foo() -> Int = true"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Int".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("main.msh"), 13..16)), + actual: "Bool".to_owned(), + }, + SourceLocation::new(PathBuf::from("main.msh"), 19..23), + )] + ); + } + + #[test] + fn block_return() { + let errors = type_check( + "fun foo() -> String = { + if true { return 5 } + return 'test' + }", + ); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "String".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("main.msh"), 13..19)), + actual: "Int".to_owned(), + }, + SourceLocation::new(PathBuf::from("main.msh"), 46..54), + )] + ); + } + + #[test] + fn return_outside_function() { + let errors = type_check("return"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::ReturnOutsideFunction, + SourceLocation::new(PathBuf::from("main.msh"), 0..6), + )] + ); + } + + #[test] + fn repeated_self_parameter() { + let errors = type_check( + "struct Foo {} + impl Foo { + fun foo(self, self) = {} + }", + ); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::RepeatedParameterName { + name: "self".to_owned(), + previous: 53..57, + }, + SourceLocation::new(PathBuf::from("main.msh"), 59..63), + )] + ); + } + + #[test] + fn access_self() { + let errors = type_check( + "struct Foo { bar: Int } + impl Foo { + fun get_bar(self) -> Int = $self.bar + }", + ); + assert_eq!(errors, []); + } + + #[test] + fn impl_template() { + let errors = type_check( + "struct Vec[T] {} + impl[T] Vec[T] { + fun get(self) -> T; + }", + ); + assert_eq!(errors, []); + } + + #[test] + fn impl_not_generic_struct() { + let errors = type_check("struct Test {}\nimpl Test[Int] { fun play(); }"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::ArityMismatch { + expected: 0, + received: 1, + }, + SourceLocation::new(PathBuf::from("main.msh"), 25..28), + )] + ); + } + + #[test] + fn method_like_field() { + let errors = type_check( + "struct Bar {}\nimpl Bar { fun foo(self, count: Int); }\nfun take(bar: Bar) = $bar.foo", + ); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::MethodLikeFieldAccess { + name: "foo".to_owned(), + parentheses: "(_)".to_owned(), + }, + SourceLocation::new(PathBuf::from("main.msh"), 80..83) + )] + ); + } + + #[test] + fn accessing_generic_type() { + let errors = + type_check("struct Bar[T] { count: T }\nfun take(bar: Bar[Int]) -> Int = $bar.count"); + assert_eq!(errors, []); + } + + #[test] + fn accessing_indirect_generic_type() { + let errors = type_check( + "struct Vec[T] {} + struct Foo[T] { vec: Vec[T] } + fun take(foo: Foo[String]) -> Vec[String] = $foo.vec", + ); + assert_eq!(errors, []); + } + + #[test] + fn incorrect_type_parameter_assign() { + let errors = type_check("fun id[T]() -> T; val j: Int = id[String]()"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Int".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("main.msh"), 25..28)), + actual: "String".to_owned(), + }, + SourceLocation::new(PathBuf::from("main.msh"), 31..43), + )] + ); + } + + #[test] + fn magic_indirect_type() { + let errors = type_check( + "struct List[T] {} + fun create[T]() -> List[T]; + val a: List[Int] = create() + val b: List[Int] = create[String]() + ", + ); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "List[Int]".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("main.msh"), 117..126)), + actual: "List[String]".to_owned(), + }, + SourceLocation::new(PathBuf::from("main.msh"), 129..145), + )] + ); + } + + #[test] + fn return_inference_last_resort() { + let errors = type_check( + "struct Box[T] {} + fun create[T](t: T) -> Box[T]; + val a: Box[Unit] = create('hi') + ", + ); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Box[Unit]".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("main.msh"), 79..88)), + actual: "Box[String]".to_owned(), + }, + SourceLocation::new(PathBuf::from("main.msh"), 91..103), + )] + ); + } + + #[test] + fn inner_type_inference() { + // Maybe point to the T or the other arg? + let errors = type_check( + "struct Box[T] {} + fun box[T](content: T) -> Box[T]; + fun zip[T](a: Box[T], b: Box[T]) -> Box[T]; + zip(box('hi'), box(42)) + ", + ); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Box[String]".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("main.msh"), 97..106)), + actual: "Box[Int]".to_owned(), + }, + SourceLocation::new(PathBuf::from("main.msh"), 146..153), + )] + ); + } + + #[test] + fn different_source_of_inference() { + let errors = type_check( + "struct Box[T] {} + fun test[A, B](a: A) -> Box[B]; + val b: Box[Bool] = test(1); + ", + ); + assert_eq!(errors, []); + } + + #[test] + fn check_variable_multi_files() { + let errors = type_check_multi([ + ( + PathBuf::from("main"), + "use reef::other::letter\nval n: Int = $letter", + ), + (PathBuf::from("other"), "val letter = 'a'"), + ]); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Int".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("main"), 31..34)), + actual: "String".to_owned(), + }, + SourceLocation::new(PathBuf::from("main"), 37..44), + )] + ); + } +} diff --git a/analyzer/src/typing/assign.rs b/analyzer/src/typing/assign.rs new file mode 100644 index 00000000..4737f128 --- /dev/null +++ b/analyzer/src/typing/assign.rs @@ -0,0 +1,148 @@ +use crate::hir::{ExprKind, LocalAssignment, Module, TypedExpr}; +use crate::symbol::{SymbolRegistry, UndefinedSymbol}; +use crate::typing::variable::VariableTable; +use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; +use crate::SourceLocation; +use ast::operation::{BinaryOperation, BinaryOperator}; +use ast::r#struct::FieldAccess; +use ast::r#use::InclusionPathItem; +use ast::range::Subscript; +use ast::variable::{Assign, AssignOperator}; +use ast::Expr; +use context::source::SourceSegmentHolder; + +pub(super) fn ascribe_assign( + assign: &Assign, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + if let Expr::Subscript(sub) = assign.left.as_ref() { + return ascribe_assign_subscript(assign, sub, table, checker, storage, ctx, errors); + } + if let Expr::FieldAccess(field) = assign.left.as_ref() { + return ascribe_field_assign(assign, field, table, checker, storage, ctx, errors); + } + + let left = match assign.left.as_ref() { + Expr::VarReference(var) => table.lookup_variable(var.name.name()), + Expr::Path(path) => { + if let [InclusionPathItem::Symbol(ident)] = path.path.as_slice() { + table.lookup_variable(ident.value.as_str()) + } else { + Err(UndefinedSymbol::NotFound) + } + } + _ => Err(UndefinedSymbol::NotFound), + }; + + let rhs = ascribe_assign_rhs( + assign, + table, + checker, + storage, + ctx.with_hint( + left.as_ref() + .map_or(TypeHint::Used, |var| TypeHint::Required(var.ty)), + ), + errors, + ); + match left { + Ok(var) => { + if let Err(_) = checker.types.unify(rhs.ty, var.ty) { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(var.ty), + expected_due_to: None, + actual: checker.display(rhs.ty), + }, + SourceLocation::new(table.path().to_owned(), assign.segment()), + )); + } + if !var.can_reassign { + errors.push(TypeError::new( + TypeErrorKind::CannotReassign { + name: assign.name().unwrap_or_default(), + }, + SourceLocation::new(table.path().to_owned(), assign.segment()), + )); + } + TypedExpr { + kind: ExprKind::LocalAssign(LocalAssignment { + identifier: var.id, + rhs: Box::new(rhs), + }), + ty: var.ty, + span: assign.segment(), + } + } + Err(err) => { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: assign.name().unwrap_or_default(), + expected: SymbolRegistry::Variable, + found: err.into(), + }, + SourceLocation::new(table.path().to_owned(), assign.segment()), + )); + TypedExpr::error(assign.left.segment()) + } + } +} + +/// Creates the right hand side of an assignment. +/// +/// The state should contain the [`ExpressionValue::Expected`] value of the left hand side. +fn ascribe_assign_rhs( + assign: &Assign, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + match assign.operator { + AssignOperator::Assign => ascribe_type(&assign.value, table, checker, storage, ctx, errors), + operator => { + let binary = Expr::Binary(BinaryOperation { + left: assign.left.clone(), + op: BinaryOperator::try_from(operator).expect("Invalid assign operator"), + right: assign.value.clone(), + }); + ascribe_type( + &binary, + table, + checker, + storage, + ctx.with_hint(TypeHint::Used), + errors, + ) + } + } +} + +fn ascribe_assign_subscript( + assign: &Assign, + sub: &Subscript, + table: &mut VariableTable, + checker: &mut TypeChecker, + module: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + todo!() +} + +fn ascribe_field_assign( + assign: &Assign, + field: &FieldAccess, + table: &mut VariableTable, + checker: &mut TypeChecker, + module: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + todo!() +} diff --git a/analyzer/src/typing/function.rs b/analyzer/src/typing/function.rs new file mode 100644 index 00000000..d20e6684 --- /dev/null +++ b/analyzer/src/typing/function.rs @@ -0,0 +1,18 @@ +use crate::typing::{Parameter, TypeId}; +use std::path::PathBuf; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Function { + pub declared_at: PathBuf, + pub fqn: PathBuf, + pub generic_variables: Vec, + pub param_types: Vec, + pub return_type: TypeId, + pub kind: FunctionKind, +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum FunctionKind { + Function, + Constructor, +} diff --git a/analyzer/src/typing/lower.rs b/analyzer/src/typing/lower.rs new file mode 100644 index 00000000..832fcf20 --- /dev/null +++ b/analyzer/src/typing/lower.rs @@ -0,0 +1,102 @@ +use crate::hir::{ExprKind, MethodCall, Module, TypedExpr}; +use crate::typing::registry::STRING_SCHEMA; +use crate::typing::user::{UserType, STRING_TYPE}; +use crate::typing::variable::VariableTable; +use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; +use crate::SourceLocation; +use ast::value::{LiteralValue, TemplateString}; +use context::source::SourceSegmentHolder; +use std::path::Path; + +pub(super) fn ascribe_template_string( + tpl: &TemplateString, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + if tpl.parts.is_empty() { + return TypedExpr { + kind: ExprKind::Literal(LiteralValue::String(String::new())), + ty: STRING_TYPE, + span: tpl.segment(), + }; + } + + let mut it = tpl.parts.iter().map(|part| { + let typed_part = ascribe_type( + part, + table, + checker, + storage, + ctx.with_hint(TypeHint::Required(STRING_TYPE)), + errors, + ); + convert_into_string(typed_part, checker, table.path(), errors) + }); + let acc = it.next().unwrap(); + it.fold(acc, |acc, current| { + let span = current.span.clone(); + TypedExpr { + kind: ExprKind::MethodCall(MethodCall { + callee: Box::new(acc), + arguments: vec![current], + function_id: todo!("String concatenation"), + }), + ty: STRING_TYPE, + span, + } + }) +} + +pub(super) fn convert_into_string( + expr: TypedExpr, + checker: &mut TypeChecker, + path: &Path, + errors: &mut Vec, +) -> TypedExpr { + match &checker.types[expr.ty] { + UserType::Error => expr, + UserType::Parametrized { schema, .. } => { + if *schema == STRING_SCHEMA { + return expr; + } + let schema = &checker.registry[*schema]; + if let Some(method) = + schema.get_exact_method(&checker.registry, "to_string", &[], STRING_TYPE) + { + let span = expr.span.clone(); + TypedExpr { + kind: ExprKind::MethodCall(MethodCall { + callee: Box::new(expr), + arguments: Vec::new(), + function_id: method, + }), + ty: STRING_TYPE, + span, + } + } else { + errors.push(TypeError::new( + TypeErrorKind::UnknownMethod { + name: "to_string".to_owned(), + type_name: checker.display(expr.ty), + }, + SourceLocation::new(path.to_owned(), expr.span.clone()), + )); + expr + } + } + _ => { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(STRING_TYPE), + expected_due_to: None, + actual: checker.display(expr.ty), + }, + SourceLocation::new(path.to_owned(), expr.span.clone()), + )); + expr + } + } +} diff --git a/analyzer/src/typing/registry.rs b/analyzer/src/typing/registry.rs new file mode 100644 index 00000000..a9af3416 --- /dev/null +++ b/analyzer/src/typing/registry.rs @@ -0,0 +1,86 @@ +use crate::typing::function::Function; +use crate::typing::schema::Schema; +use crate::typing::user::GENERIC_TYPE; +use std::ops::{Index, IndexMut}; + +#[derive(Clone)] +pub struct Registry { + schemas: Vec, + functions: Vec, +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub struct SchemaId(usize); + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub struct FunctionId(usize); + +impl Default for Registry { + fn default() -> Self { + Self { + schemas: vec![ + Schema::new("Int".to_owned()), + Schema::new("Bool".to_owned()), + Schema::new("Exitcode".to_owned()), + Schema::new("Float".to_owned()), + Schema::new("String".to_owned()), + Schema::generic("Vec".to_owned(), vec![GENERIC_TYPE]), + Schema::new("Glob".to_owned()), + Schema::new("Pid".to_owned()), + ], + functions: Vec::new(), + } + } +} + +pub const INT_SCHEMA: SchemaId = SchemaId(0); +pub const BOOL_SCHEMA: SchemaId = SchemaId(1); +pub const EXITCODE_SCHEMA: SchemaId = SchemaId(2); +pub const FLOAT_SCHEMA: SchemaId = SchemaId(3); +pub const STRING_SCHEMA: SchemaId = SchemaId(4); +pub const VEC_SCHEMA: SchemaId = SchemaId(5); +pub const GLOB_SCHEMA: SchemaId = SchemaId(6); +pub const PID_SCHEMA: SchemaId = SchemaId(7); + +impl Registry { + /// Allocates a new [`SchemaId`] for the given [`Schema`]. + pub(crate) fn define_schema(&mut self, schema: Schema) -> SchemaId { + let id = self.schemas.len(); + self.schemas.push(schema); + SchemaId(id) + } + + /// Allocates a new [`FunctionId`] for the given [`Function`]. + pub(crate) fn define_function(&mut self, function: Function) -> FunctionId { + let id = self.functions.len(); + self.functions.push(function); + FunctionId(id) + } +} + +macro_rules! impl_index { + ($id:ty, $output:ty, $field:ident) => { + impl Index<$id> for Registry { + type Output = $output; + + fn index(&self, index: $id) -> &Self::Output { + &self.$field[index.0] + } + } + + impl IndexMut<$id> for Registry { + fn index_mut(&mut self, index: $id) -> &mut Self::Output { + &mut self.$field[index.0] + } + } + + impl $id { + pub fn get(self) -> usize { + self.0 + } + } + }; +} + +impl_index!(SchemaId, Schema, schemas); +impl_index!(FunctionId, Function, functions); diff --git a/analyzer/src/typing/schema.rs b/analyzer/src/typing/schema.rs new file mode 100644 index 00000000..1e375491 --- /dev/null +++ b/analyzer/src/typing/schema.rs @@ -0,0 +1,64 @@ +use crate::typing::registry::{FunctionId, Registry}; +use crate::typing::{Parameter, TypeId}; +use std::collections::HashMap; + +/// A structure definition, describing a type with fields and methods. +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Schema { + /// The display name of the schema. + pub name: String, + + /// The [`crate::typing::user::UserType::GenericVariable`]s used. + pub generic_variables: Vec, + + /// The fields and their types. + pub fields: HashMap, + + /// The methods and their types. + pub methods: HashMap, +} + +impl Schema { + /// Creates a new schema. + pub fn new(name: String) -> Self { + Self { + name, + generic_variables: Vec::new(), + fields: HashMap::new(), + methods: HashMap::new(), + } + } + + /// Creates a new generic schema. + pub fn generic(name: String, generic_variables: Vec) -> Self { + Self { + name, + generic_variables, + fields: HashMap::new(), + methods: HashMap::new(), + } + } + + pub fn get_exact_method( + &self, + registry: &Registry, + name: &str, + params: &[TypeId], + return_ty: TypeId, + ) -> Option { + self.methods.get(name).and_then(|&id| { + let func = ®istry[id]; + if func + .param_types + .iter() + .map(|param| param.ty) + .eq(params.iter().copied()) + && func.return_type == return_ty + { + Some(id) + } else { + None + } + }) + } +} diff --git a/analyzer/src/typing/shell.rs b/analyzer/src/typing/shell.rs new file mode 100644 index 00000000..14c03e92 --- /dev/null +++ b/analyzer/src/typing/shell.rs @@ -0,0 +1,149 @@ +use crate::hir::{ExprKind, Module, Redir, Redirect, Subprocess, Substitute, TypedExpr}; +use crate::typing::lower::convert_into_string; +use crate::typing::user::{EXITCODE_TYPE, GLOB_TYPE, INT_TYPE, PID_TYPE, STRING_TYPE}; +use crate::typing::variable::VariableTable; +use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; +use crate::SourceLocation; +use ast::call::{Call, Detached, Pipeline, RedirOp, Redirected}; +use ast::substitution::Substitution; +use context::source::SourceSegmentHolder; + +pub(super) fn ascribe_call( + call: &Call, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let args = call + .arguments + .iter() + .map(|expr| { + let expr = ascribe_type(expr, table, checker, storage, ctx, errors); + if expr.ty == GLOB_TYPE { + todo!("globbing") + } else { + convert_into_string(expr, checker, table.path(), errors) + } + }) + .collect::>(); + + TypedExpr { + kind: ExprKind::ProcessCall(args), + span: call.segment(), + ty: EXITCODE_TYPE, + } +} + +pub(super) fn ascribe_redirected( + redirected: &Redirected, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let expr = ascribe_type(&redirected.expr, table, checker, storage, ctx, errors); + let mut redirections = Vec::with_capacity(redirected.redirections.len()); + for redirection in &redirected.redirections { + let operand = ascribe_type(&redirection.operand, table, checker, storage, ctx, errors); + let operand = if matches!(redirection.operator, RedirOp::FdIn | RedirOp::FdOut) { + if operand.ty != INT_TYPE { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(INT_TYPE), + expected_due_to: None, + actual: checker.display(operand.ty), + }, + SourceLocation::new(table.path().to_owned(), operand.span.clone()), + )); + } + operand + } else { + convert_into_string(operand, checker, table.path(), errors) + }; + redirections.push(Redir { + fd: redirection.fd, + operator: redirection.operator, + operand: Box::new(operand), + }); + } + let ty = expr.ty; + TypedExpr { + kind: ExprKind::Redirect(Redirect { + expression: Box::new(expr), + redirections, + }), + ty, + span: redirected.segment(), + } +} + +pub(super) fn ascribe_detached( + detached: &Detached, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let expr = ascribe_type(&detached.underlying, table, checker, storage, ctx, errors); + TypedExpr { + kind: ExprKind::Subprocess(Subprocess { + inner: Box::new(expr), + awaited: false, + }), + ty: PID_TYPE, + span: detached.segment(), + } +} + +pub(super) fn ascribe_pipeline( + pipeline: &Pipeline, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let mut commands = Vec::with_capacity(pipeline.commands.len()); + for command in &pipeline.commands { + commands.push(ascribe_type(command, table, checker, storage, ctx, errors)); + } + TypedExpr { + kind: ExprKind::Pipeline(commands), + ty: EXITCODE_TYPE, + span: pipeline.segment(), + } +} + +pub(super) fn ascribe_substitution( + substitution: &Substitution, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let state = ctx.with_hint(TypeHint::Unused); + let commands = substitution + .underlying + .expressions + .iter() + .map(|command| ascribe_type(command, table, checker, storage, state, errors)) + .collect::>(); + TypedExpr { + kind: match substitution.kind { + ast::substitution::SubstitutionKind::Capture => ExprKind::Capture(commands), + ast::substitution::SubstitutionKind::Process { direction } => { + ExprKind::Substitute(match direction { + ast::substitution::Direction::Input => Substitute::In(commands), + ast::substitution::Direction::Output => Substitute::Out(commands), + }) + } + }, + ty: STRING_TYPE, + span: substitution.segment(), + } +} diff --git a/analyzer/src/typing/user.rs b/analyzer/src/typing/user.rs new file mode 100644 index 00000000..6052567a --- /dev/null +++ b/analyzer/src/typing/user.rs @@ -0,0 +1,205 @@ +use crate::typing::registry::{self, FunctionId, SchemaId}; +use crate::typing::UnifyError; +use std::ops::Index; + +/// A user-defined type that can be referenced by a [`TypeId`]. +#[derive(Debug, PartialEq, Eq, Clone, Default)] +pub enum UserType { + #[default] + Unknown, + + Error, + + Nothing, + + Unit, + + Function(FunctionId), + + Parametrized { + schema: SchemaId, + params: Vec, + }, + + Module(Vec), + + GenericVariable(String), +} + +impl From for UserType { + fn from(func: FunctionId) -> Self { + Self::Function(func) + } +} + +impl From for UserType { + fn from(schema: SchemaId) -> Self { + Self::Parametrized { + schema, + params: Vec::new(), + } + } +} + +/// A collection of types that can be referenced by an [`TypeId`]. +pub struct TypeArena { + types: Vec, +} + +impl TypeArena { + /// Allocates a new [`TypeId`] for the given [`UserType`]. + pub(crate) fn alloc(&mut self, ty: UserType) -> TypeId { + let id = self.types.len(); + self.types.push(ty); + TypeId(id) + } + + pub(crate) fn unify(&mut self, rhs: TypeId, assign_to: TypeId) -> Result { + match (&self[assign_to], &self[rhs]) { + (UserType::Error, _) | (_, UserType::Error) => Ok(ERROR_TYPE), + (_, UserType::Nothing) => Ok(assign_to), + (UserType::Unknown, _) | (_, UserType::Unknown) => { + panic!("Unknown type should not be unified") + } + (lhs, rhs) if lhs == rhs => Ok(assign_to), + (_, _) => Err(UnifyError), + } + } + + /// Given a possible generic type, create a parameterized variant for the given context. + pub(crate) fn concretize( + &mut self, + ty: TypeId, + generics: &[TypeId], + params: &[TypeId], + ) -> TypeId { + assert_eq!(generics.len(), params.len(), "expected same length between generics {generics:?} and their concretized counterparts {params:?}"); + match &self[ty] { + UserType::Parametrized { + schema, + params: sub_params, + } => { + let concrete_params = sub_params + .iter() + .map(|ty| { + if let Some(concrete_ty) = generics.iter().position(|&pty| pty == *ty) { + params[concrete_ty] + } else { + *ty + } + }) + .collect::>(); + self.alloc(UserType::Parametrized { + schema: *schema, + params: concrete_params, + }) + } + _ => generics + .iter() + .position(|&pty| pty == ty) + .map_or(ty, |idx| params[idx]), + } + } +} + +/// An access key to one of the types in the [`TypeArena`]. +/// +/// An identifier should only be used with the [`TypeArena`] that it was created with. +/// Only the placeholder value [`TypeId::default`] and the predefined constants are guaranteed to be +/// valid across all [`TypeArena`]s. +#[derive(Debug, Clone, PartialEq, Eq, Copy, Default)] +pub struct TypeId(usize); + +pub const UNKNOWN_TYPE: TypeId = TypeId(0); +pub const ERROR_TYPE: TypeId = TypeId(1); +pub const NOTHING_TYPE: TypeId = TypeId(2); +pub const UNIT_TYPE: TypeId = TypeId(3); +pub const INT_TYPE: TypeId = TypeId(4); +pub const BOOL_TYPE: TypeId = TypeId(5); +pub const EXITCODE_TYPE: TypeId = TypeId(6); +pub const FLOAT_TYPE: TypeId = TypeId(7); +pub const STRING_TYPE: TypeId = TypeId(8); +pub const GENERIC_TYPE: TypeId = TypeId(9); +pub const VECTOR_TYPE: TypeId = TypeId(10); +pub const GLOB_TYPE: TypeId = TypeId(11); +pub const PID_TYPE: TypeId = TypeId(13); + +/// Gets the [`TypeId`] for a built-in type by its name. +pub(crate) fn lookup_builtin_type(name: &str) -> Option { + match name { + "Nothing" => Some(NOTHING_TYPE), + "Unit" => Some(UNIT_TYPE), + "Int" => Some(INT_TYPE), + "Bool" => Some(BOOL_TYPE), + "Exitcode" => Some(EXITCODE_TYPE), + "Float" => Some(FLOAT_TYPE), + "String" => Some(STRING_TYPE), + "Vec" => Some(VECTOR_TYPE), + "Glob" => Some(GLOB_TYPE), + "Pid" => Some(PID_TYPE), + _ => None, + } +} + +impl TypeId { + pub fn is_ok(self) -> bool { + self != ERROR_TYPE + } + + pub fn is_err(self) -> bool { + self == ERROR_TYPE + } + + pub fn is_obj(self) -> bool { + !matches!( + self, + NOTHING_TYPE + | UNIT_TYPE + | INT_TYPE + | BOOL_TYPE + | EXITCODE_TYPE + | FLOAT_TYPE + | PID_TYPE + | ERROR_TYPE + ) + } + + pub fn define_if_absent(&mut self, ty: Self) { + if *self == UNKNOWN_TYPE { + self.0 = ty.0; + } + } +} + +impl Default for TypeArena { + fn default() -> Self { + Self { + types: vec![ + UserType::Unknown, + UserType::Error, + UserType::Nothing, + UserType::Unit, + UserType::from(registry::INT_SCHEMA), + UserType::from(registry::BOOL_SCHEMA), + UserType::from(registry::EXITCODE_SCHEMA), + UserType::from(registry::FLOAT_SCHEMA), + UserType::from(registry::STRING_SCHEMA), + UserType::GenericVariable("T".to_owned()), + UserType::Parametrized { + schema: registry::VEC_SCHEMA, + params: vec![GENERIC_TYPE], + }, + UserType::from(registry::GLOB_SCHEMA), + UserType::from(registry::PID_SCHEMA), + ], + } + } +} + +impl Index for TypeArena { + type Output = UserType; + + fn index(&self, index: TypeId) -> &Self::Output { + &self.types[index.0] + } +} diff --git a/analyzer/src/typing/variable.rs b/analyzer/src/typing/variable.rs new file mode 100644 index 00000000..c9591e6e --- /dev/null +++ b/analyzer/src/typing/variable.rs @@ -0,0 +1,242 @@ +use crate::hir::NamedExports; +use crate::module::Export; +use crate::symbol::{Symbol, SymbolRegistry, SymbolTable, UndefinedSymbol}; +use crate::typing::user::TypeId; +use context::source::Span; +use std::collections::HashMap; +use std::ops::Index; +use std::path::Path; +use std::rc::Rc; + +/// A variable identifier. +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum Var { + /// The variable is scoped to a [`LocalEnvironment`]. + Local(LocalId), + + /// The variable is a global variable and lives as long as the module. + Global(Rc), +} + +/// A key for a [`LocalEnvironment`]. +#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] +pub struct LocalId(pub usize); + +/// A table that tracks the symbols and variables properties. +/// +/// While the [`SymbolTable`] binds the symbols to their names, this table also tracks +/// variable-specific properties, i.e. if they are globals, locals or captures, and if they can be +/// reassigned. +pub(crate) struct VariableTable<'a> { + /// The inner symbol table that this table is based on. + inner: &'a mut SymbolTable, + + /// The environments that are currently in scope. + environments: Vec, + + /// The global variables that this table created. + globals: HashMap, + + /// The mapping from symbol indices in the symbol table to local variables. + /// + /// If not present, the variable is assumed to be a global. + symbols_to_locals: HashMap, +} + +/// A reduced [`SymbolRegistry`] that identify symbols, but not variables. +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum SymbolEntry { + /// A named function. + Function, + + /// A named type, either a struct or a module. + Type, +} + +impl From for SymbolRegistry { + fn from(entry: SymbolEntry) -> Self { + match entry { + SymbolEntry::Function => SymbolRegistry::Function, + SymbolEntry::Type => SymbolRegistry::Type, + } + } +} + +impl<'a> VariableTable<'a> { + /// Create a new variable table for the given symbol table. + pub(super) fn new(inner: &'a mut SymbolTable) -> Self { + Self { + inner, + environments: Vec::new(), + globals: HashMap::new(), + symbols_to_locals: HashMap::new(), + } + } + + /// Insert a new local variable into the table. + pub(super) fn insert_variable( + &mut self, + name: String, + ty: TypeId, + declared_at: Span, + can_reassign: bool, + ) -> Var { + let id = if self.inner.current_depth == 0 { + let name_id = Rc::new(name.clone()); + self.globals.insert( + name.clone(), + GlobalInfo { + id: name_id.clone(), + can_reassign, + ty, + }, + ); + Var::Global(name_id) + } else { + let env_id = self.environments.len() - 1; + let env = self + .environments + .last_mut() + .expect("At least one environment should exist"); + let id = LocalId(env.locals.len()); + self.symbols_to_locals + .insert(self.inner.len(), (env_id, id)); + env.locals.push(LocalInfo { + id, + can_reassign, + ty, + }); + Var::Local(id) + }; + self.inner + .insert_local(name, ty, declared_at, SymbolRegistry::Variable); + id + } + + pub(super) fn insert_local( + &mut self, + name: String, + ty: TypeId, + declared_at: Span, + entry: SymbolEntry, + ) { + self.inner.insert_local(name, ty, declared_at, entry.into()); + } + + pub(super) fn insert_remote(&mut self, name: String, imported_at: Span, export: &Export) { + self.inner.insert_remote(name, imported_at, export); + } + + pub(super) fn get(&self, name: &str, registry: SymbolRegistry) -> Option<&Symbol> { + self.inner.get(name, registry) + } + + pub(super) fn lookup( + &self, + name: &str, + registry: SymbolRegistry, + ) -> Result<&Symbol, UndefinedSymbol> { + self.inner.lookup(name, registry) + } + + pub(super) fn lookup_variable(&self, name: &str) -> Result { + match self.inner.lookup_position(name, SymbolRegistry::Variable) { + Ok((idx, symbol)) => Ok(match self.symbols_to_locals.get(&idx) { + Some((env, local)) => VariableInfo::from(self.environments[*env][*local].clone()), + None => { + if let Some(info) = self.globals.get(name) { + // Locally declared global variable + VariableInfo::from(info.clone()) // TODO optimize + } else { + // External global variable + VariableInfo { + id: Var::Global(Rc::new(name.to_owned())), + can_reassign: false, + ty: symbol.ty, + } + } + } + }), + Err(err) => Err(err), + } + } + + pub(super) fn enter_scope(&mut self) { + self.inner.enter_scope(); + } + + pub(super) fn exit_scope(&mut self) { + self.inner.exit_scope(); + } + + pub(super) fn push_environment(&mut self) { + self.environments.push(LocalEnvironment::default()); + } + + pub(super) fn pop_environment(&mut self) -> LocalEnvironment { + self.environments + .pop() + .expect("At least one environment should exist") + } + + pub(super) fn path(&self) -> &Path { + &self.inner.path + } + + pub(super) fn take_exports(&mut self) -> NamedExports { + self.globals + .drain() + .map(|(_, info)| (info.id, info.ty)) + .collect() + } +} + +#[derive(Default)] +pub struct LocalEnvironment { + /// All variables that are owned by this environment, independently of their scope. + pub locals: Vec, + + /// Tells which locals in this environment are captures. + /// + /// Upvalues mark variables that belong to a parent environment. + pub upvalues: Vec, +} + +#[derive(Clone)] +pub struct AssignableInfo { + pub id: I, + pub can_reassign: bool, + pub ty: TypeId, +} + +pub type LocalInfo = AssignableInfo; +pub type GlobalInfo = AssignableInfo>; +pub type VariableInfo = AssignableInfo; + +impl Index for LocalEnvironment { + type Output = LocalInfo; + + fn index(&self, index: LocalId) -> &Self::Output { + &self.locals[index.0] + } +} + +impl From for VariableInfo { + fn from(local: LocalInfo) -> Self { + Self { + id: Var::Local(local.id), + can_reassign: local.can_reassign, + ty: local.ty, + } + } +} + +impl From for VariableInfo { + fn from(global: GlobalInfo) -> Self { + Self { + id: Var::Global(global.id), + can_reassign: global.can_reassign, + ty: global.ty, + } + } +} diff --git a/analyzer/tests/collect_debug.rs b/analyzer/tests/collect_debug.rs index 2446331f..daaf0870 100644 --- a/analyzer/tests/collect_debug.rs +++ b/analyzer/tests/collect_debug.rs @@ -1,211 +1,211 @@ -use std::collections::HashSet; - -use analyzer::analyze; -use analyzer::engine::Engine; -use pretty_assertions::assert_eq; - -use analyzer::environment::symbols::{Symbol, SymbolLocation, SymbolRegistry}; -use analyzer::importer::StaticImporter; -use analyzer::imports::Imports; -use analyzer::name::Name; -use analyzer::reef::{Externals, Reef, ReefId}; -use analyzer::relations::{ - LocalId, Relation, RelationId, RelationState, Relations, ResolvedSymbol, SourceId, SymbolRef, -}; -use analyzer::steps::collect::SymbolCollector; -use analyzer::steps::resolve::SymbolResolver; -use context::str_find::{find_between, find_in}; -use parser::parse_trusted; - -#[test] -fn collect_sample() { - let source = include_str!("debug_sample.msh"); - let root_name = Name::new("debug_sample"); - let lib_name = Name::new("lib"); - - let mut engine = Engine::default(); - let mut relations = Relations::default(); - let mut imports = Imports::default(); - - let mut to_visit = vec![root_name.clone()]; - let mut visited = HashSet::new(); - let mut importer = StaticImporter::new( - [ - (root_name.clone(), source), - (lib_name.clone(), "val LOG_FILE = 'debug.log'; val n = 1"), - ], - parse_trusted, - ); - - let mut externals = Externals::default(); - let lib_reef = Reef::new( - "lib".to_owned(), - analyze(lib_name, &mut importer, &externals), - ); - externals.register(lib_reef); - - let diagnostics = SymbolCollector::collect_symbols( - &mut engine, - &mut relations, - &mut imports, - &externals, - &mut to_visit, - &mut visited, - &mut importer, - ); - assert_eq!(diagnostics, vec![]); - - let diagnostics = SymbolResolver::resolve_symbols( - &engine, - &mut relations, - &mut imports, - &externals, - &mut to_visit, - &visited, - ); - assert_eq!(diagnostics, vec![]); - - let root_env = engine - .get_environment(SourceId(0)) - .expect("Unable to get root environment"); - assert_eq!( - root_env.get_raw_symbol(find_between(source, "fun factorial(", "return $a\n}")), - Some(SymbolRef::Local(LocalId(0))) - ); - assert_eq!( - root_env.get_raw_symbol(find_between(source, "fun debug(", "wait\n}")), - Some(SymbolRef::Local(LocalId(1))) - ); - - let factorial_env = engine - .get_environment(SourceId(1)) - .expect("Unable to get factorial environment"); - assert_eq!(factorial_env.fqn, root_name.child("factorial")); - let variables = factorial_env.symbols.all(); - assert_eq!( - variables, - &vec![ - Symbol::scoped("n".to_owned(), 0), - Symbol::scoped("a".to_owned(), 1), - Symbol::scoped("i".to_owned(), -2), - ] - ); - let exported = factorial_env.symbols.exported_symbols().collect::>(); - assert_eq!(exported, vec![]); - - let n_parameter = factorial_env - .symbols - .find_reachable("n", SymbolRegistry::Objects) - .map(SymbolRef::Local) - .expect("Unable to get n symbol"); - - assert_eq!( - factorial_env.get_raw_symbol(find_in(source, "$n")), - Some(n_parameter) - ); - let references = { - let mut references = factorial_env.find_references(n_parameter); - references.sort_by_key(|range| range.start); - references - }; - assert_eq!( - references, - vec![find_in(source, "n: Int"), find_in(source, "$n")] - ); - - let debug_env = engine - .get_environment(SourceId(2)) - .expect("Unable to get debug() environment"); - assert_eq!(debug_env.fqn, root_name.child("debug")); - let usages = debug_env.symbols.external_symbols().collect::>(); - assert_eq!( - usages, - vec![( - &SymbolLocation::unspecified(Name::new("LOG_FILE")), - RelationId(2) - )] - ); - assert_eq!( - relations[RelationId(2)], - Relation { - origin: SourceId(2), - state: RelationState::Resolved(ResolvedSymbol { - reef: ReefId(1), - source: SourceId(0), - object_id: LocalId(0), - }), - registry: SymbolRegistry::Objects - } - ); - - let callback_env = engine - .get_environment(SourceId(4)) - .expect("Unable to get callback environment"); - assert_eq!(callback_env.fqn, root_name.child("main").child("callback")); - - let mut globals = callback_env.symbols.external_symbols().collect::>(); - globals.sort_by_key(|(loc, _)| &loc.name); - assert_eq!( - globals, - vec![ - ( - &SymbolLocation::unspecified(Name::new("count")), - RelationId(3) - ), - ( - &SymbolLocation::unspecified(Name::new("factorial")), - RelationId(4) - ), - (&SymbolLocation::unspecified(Name::new("n")), RelationId(5)), - ] - ); - - let reef_id = ReefId(2); - - assert_eq!( - relations[RelationId(3)], - Relation { - origin: SourceId(4), - state: RelationState::Resolved(ResolvedSymbol { - reef: reef_id, - source: SourceId(3), - object_id: LocalId(0), - }), - registry: SymbolRegistry::Objects - } - ); - assert_eq!( - relations[RelationId(4)], - Relation { - origin: SourceId(4), - state: RelationState::Resolved(ResolvedSymbol { - reef: reef_id, - source: SourceId(0), - object_id: LocalId(0), - }), - registry: SymbolRegistry::Objects - } - ); - assert_eq!( - relations[RelationId(5)], - Relation { - origin: SourceId(4), - state: RelationState::Resolved(ResolvedSymbol { - reef: reef_id, - source: SourceId(0), - object_id: LocalId(3), - }), - registry: SymbolRegistry::Objects - } - ); - - let lambda_env = engine - .get_environment(SourceId(5)) - .expect("Unable to get lambda environment"); - - let variables = lambda_env.symbols.external_symbols().collect::>(); - assert_eq!( - variables, - vec![(&SymbolLocation::unspecified(Name::new("n")), RelationId(6))] - ); -} +// use std::collections::HashSet; +// +// use analyzer::analyze; +// use analyzer::engine::Engine; +// use pretty_assertions::assert_eq; +// +// use analyzer::environment::symbols::{Symbol, SymbolLocation, SymbolRegistry}; +// use analyzer::importer::StaticImporter; +// use analyzer::imports::Imports; +// use analyzer::name::Name; +// use analyzer::reef::{Externals, Reef, ReefId}; +// use analyzer::relations::{ +// LocalId, Relation, RelationId, RelationState, Relations, ResolvedSymbol, SourceId, SymbolRef, +// }; +// use analyzer::steps::collect::SymbolCollector; +// use analyzer::steps::resolve::SymbolResolver; +// use context::str_find::{find_between, find_in}; +// use parser::parse_trusted; +// +// #[test] +// fn collect_sample() { +// let source = include_str!("debug_sample.msh"); +// let root_name = Name::new("debug_sample"); +// let lib_name = Name::new("lib"); +// +// let mut engine = Engine::default(); +// let mut relations = Relations::default(); +// let mut imports = Imports::default(); +// +// let mut to_visit = vec![root_name.clone()]; +// let mut visited = HashSet::new(); +// let mut importer = StaticImporter::new( +// [ +// (root_name.clone(), source), +// (lib_name.clone(), "val LOG_FILE = 'debug.log'; val n = 1"), +// ], +// parse_trusted, +// ); +// +// let mut externals = Externals::default(); +// let lib_reef = Reef::new( +// "lib".to_owned(), +// analyze(lib_name, &mut importer, &externals), +// ); +// externals.register(lib_reef); +// +// let diagnostics = SymbolCollector::collect_symbols( +// &mut engine, +// &mut relations, +// &mut imports, +// &externals, +// &mut to_visit, +// &mut visited, +// &mut importer, +// ); +// assert_eq!(diagnostics, vec![]); +// +// let diagnostics = SymbolResolver::resolve_symbols( +// &engine, +// &mut relations, +// &mut imports, +// &externals, +// &mut to_visit, +// &visited, +// ); +// assert_eq!(diagnostics, vec![]); +// +// let root_env = engine +// .get_environment(SourceId(0)) +// .expect("Unable to get root environment"); +// assert_eq!( +// root_env.get_raw_symbol(find_between(source, "fun factorial(", "return $a\n}")), +// Some(SymbolRef::Local(LocalId(0))) +// ); +// assert_eq!( +// root_env.get_raw_symbol(find_between(source, "fun debug(", "wait\n}")), +// Some(SymbolRef::Local(LocalId(1))) +// ); +// +// let factorial_env = engine +// .get_environment(SourceId(1)) +// .expect("Unable to get factorial environment"); +// assert_eq!(factorial_env.fqn, root_name.child("factorial")); +// let variables = factorial_env.symbols.all(); +// assert_eq!( +// variables, +// &vec![ +// Symbol::scoped("n".to_owned(), 0), +// Symbol::scoped("a".to_owned(), 1), +// Symbol::scoped("i".to_owned(), -2), +// ] +// ); +// let exported = factorial_env.symbols.exported_symbols().collect::>(); +// assert_eq!(exported, vec![]); +// +// let n_parameter = factorial_env +// .symbols +// .find_reachable("n", SymbolRegistry::Objects) +// .map(SymbolRef::Local) +// .expect("Unable to get n symbol"); +// +// assert_eq!( +// factorial_env.get_raw_symbol(find_in(source, "$n")), +// Some(n_parameter) +// ); +// let references = { +// let mut references = factorial_env.find_references(n_parameter); +// references.sort_by_key(|range| range.start); +// references +// }; +// assert_eq!( +// references, +// vec![find_in(source, "n: Int"), find_in(source, "$n")] +// ); +// +// let debug_env = engine +// .get_environment(SourceId(2)) +// .expect("Unable to get debug() environment"); +// assert_eq!(debug_env.fqn, root_name.child("debug")); +// let usages = debug_env.symbols.external_symbols().collect::>(); +// assert_eq!( +// usages, +// vec![( +// &SymbolLocation::unspecified(Name::new("LOG_FILE")), +// RelationId(2) +// )] +// ); +// assert_eq!( +// relations[RelationId(2)], +// Relation { +// origin: SourceId(2), +// state: RelationState::Resolved(ResolvedSymbol { +// reef: ReefId(1), +// source: SourceId(0), +// object_id: LocalId(0), +// }), +// registry: SymbolRegistry::Objects +// } +// ); +// +// let callback_env = engine +// .get_environment(SourceId(4)) +// .expect("Unable to get callback environment"); +// assert_eq!(callback_env.fqn, root_name.child("main").child("callback")); +// +// let mut globals = callback_env.symbols.external_symbols().collect::>(); +// globals.sort_by_key(|(loc, _)| &loc.name); +// assert_eq!( +// globals, +// vec![ +// ( +// &SymbolLocation::unspecified(Name::new("count")), +// RelationId(3) +// ), +// ( +// &SymbolLocation::unspecified(Name::new("factorial")), +// RelationId(4) +// ), +// (&SymbolLocation::unspecified(Name::new("n")), RelationId(5)), +// ] +// ); +// +// let reef_id = ReefId(2); +// +// assert_eq!( +// relations[RelationId(3)], +// Relation { +// origin: SourceId(4), +// state: RelationState::Resolved(ResolvedSymbol { +// reef: reef_id, +// source: SourceId(3), +// object_id: LocalId(0), +// }), +// registry: SymbolRegistry::Objects +// } +// ); +// assert_eq!( +// relations[RelationId(4)], +// Relation { +// origin: SourceId(4), +// state: RelationState::Resolved(ResolvedSymbol { +// reef: reef_id, +// source: SourceId(0), +// object_id: LocalId(0), +// }), +// registry: SymbolRegistry::Objects +// } +// ); +// assert_eq!( +// relations[RelationId(5)], +// Relation { +// origin: SourceId(4), +// state: RelationState::Resolved(ResolvedSymbol { +// reef: reef_id, +// source: SourceId(0), +// object_id: LocalId(3), +// }), +// registry: SymbolRegistry::Objects +// } +// ); +// +// let lambda_env = engine +// .get_environment(SourceId(5)) +// .expect("Unable to get lambda environment"); +// +// let variables = lambda_env.symbols.external_symbols().collect::>(); +// assert_eq!( +// variables, +// vec![(&SymbolLocation::unspecified(Name::new("n")), RelationId(6))] +// ); +// } diff --git a/ast/src/function.rs b/ast/src/function.rs index 452bd617..2184293d 100644 --- a/ast/src/function.rs +++ b/ast/src/function.rs @@ -29,6 +29,16 @@ pub enum FunctionParameter { Slf(SourceSegment), } +impl FunctionParameter { + pub fn name(&self) -> &str { + match self { + Self::Named(v) => &v.name.value, + Self::Variadic(_, _) => "@", + Self::Slf(_) => "self", + } + } +} + impl SourceSegmentHolder for FunctionParameter { fn segment(&self) -> SourceSegment { match self { diff --git a/ast/src/use.rs b/ast/src/use.rs index db19c63d..dc4acdfd 100644 --- a/ast/src/use.rs +++ b/ast/src/use.rs @@ -11,7 +11,7 @@ pub struct Use { pub import: Import, } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum InclusionPathItem { Symbol(Identifier), Reef(SourceSegment), diff --git a/ast/src/variable.rs b/ast/src/variable.rs index 63d9c273..f940c46b 100644 --- a/ast/src/variable.rs +++ b/ast/src/variable.rs @@ -164,7 +164,7 @@ pub enum Tilde { WorkingDir, } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct Identifier { pub value: CompactString, pub start: usize, diff --git a/cli/src/cli.rs b/cli/src/cli.rs index d1289a2e..6115a225 100644 --- a/cli/src/cli.rs +++ b/cli/src/cli.rs @@ -1,24 +1,15 @@ -use std::collections::HashMap; -use std::io::stderr; -use std::path::PathBuf; - +use crate::disassemble::display_bytecode; +use crate::pipeline::RealFilesystem; +use crate::report::{error_to_diagnostic, MultiFile}; +use analyzer::{Database, PipelineError, Reef}; use clap::Parser; use clap_complete::Shell; - -use analyzer::diagnostic::Diagnostic; -use analyzer::name::Name; -use analyzer::reef::Externals; -use analyzer::relations::SourceId; -use analyzer::Analyzer; -use compiler::externals::CompilerExternals; -use compiler::{compile_reef, CompilerOptions, SourceLineProvider}; -use context::source::ContentId; +use cli::pipeline::PipelineStatus; +use compiler::{compile_reef, CompilerOptions}; +use miette::Report; +use std::path::PathBuf; use vm::{VmError, VM}; -use crate::disassemble::display_bytecode; -use crate::pipeline::{FileImportError, PipelineStatus, SourceHolder, SourcesCache}; -use crate::report::{display_diagnostic, display_parse_error}; - /// The Moshell scripting language. #[derive(Parser)] #[command(author, version, about, long_about = None)] @@ -52,146 +43,42 @@ pub struct Cli { pub(crate) program_arguments: Vec, } -pub struct CachedSourceLocationLineProvider { - lines: HashMap>, -} - -impl CachedSourceLocationLineProvider { - fn compute(contents: &[ContentId], sources: &impl SourceHolder) -> Self { - let lines = contents - .iter() - .map(|&content_id| { - let source = sources.get_source(content_id).expect("unknown content id"); - - let source_start_addr = source.source.as_ptr() as usize; - - let source_lines_starts: Vec<_> = source - .source - .lines() - .map(|line| line.as_ptr() as usize - source_start_addr) - .collect(); - - (content_id, source_lines_starts) - }) - .collect(); - - Self { lines } - } -} - -impl SourceLineProvider for CachedSourceLocationLineProvider { - fn get_line(&self, content: ContentId, pos: usize) -> Option { - self.lines.get(&content).map(|lines| { - lines - .binary_search(&pos) - .map(|line| line + 1) - .unwrap_or_else(|line| line) - }) - } -} - #[must_use = "The pipeline status should be checked"] -#[allow(clippy::too_many_arguments)] pub fn use_pipeline( - entry_point: &Name, - starting_page: SourceId, - analyzer: &Analyzer<'_>, - externals: &Externals, - compiler_externals: &mut CompilerExternals, + database: &Database, + reef: &Reef, + fs: &RealFilesystem, vm: &mut VM, - diagnostics: Vec, - errors: Vec, - sources: &SourcesCache, + errors: Vec, config: &Cli, ) -> PipelineStatus { - if errors.is_empty() && analyzer.resolution.engine.is_empty() { - eprintln!("No module found for entry point {entry_point}"); - return PipelineStatus::IoError; - } - - let reef_id = externals.current; - - let mut import_status = PipelineStatus::Success; + let mut status = PipelineStatus::Success; for error in errors { - match error { - FileImportError::IO { inner, path } => { - eprintln!("Couldn't read {}: {inner}", path.display()); - import_status = PipelineStatus::IoError; - } - FileImportError::Parse(report) => { - for error in report.errors { - let source = sources - .get(reef_id) - .and_then(|importer| importer.get_source(report.source)) - .unwrap(); - display_parse_error(source, error, &mut stderr()) - .expect("IO error when reporting diagnostics"); - } - - // Prefer the IO error over a generic failure - if import_status != PipelineStatus::IoError { - import_status = PipelineStatus::AnalysisError; - } - } - } - } - if import_status != PipelineStatus::Success { - return import_status; + status = status.compose(match &error { + PipelineError::Import { .. } => PipelineStatus::IoError, + PipelineError::Parse { .. } | PipelineError::Type(_) => PipelineStatus::AnalysisError, + }); + let mut multi_file = MultiFile::default(); + let diagnostic = error_to_diagnostic(error, &mut multi_file, fs); + let report = Report::from(diagnostic).with_source_code(multi_file); + eprintln!("{report:?}"); } - - let engine = &analyzer.resolution.engine; - if config.ast { - for ast in engine - .environments() - .filter(|(_, env)| env.parent.is_none()) - .filter_map(|(id, _)| engine.get_expression(id)) - { - println!("{ast:#?}") - } - } - - let mut stderr = stderr(); - let had_errors = !diagnostics.is_empty(); - for diagnostic in diagnostics { - display_diagnostic( - externals, - engine, - externals.current, - sources, - diagnostic, - &mut stderr, - ) - .expect("IO errors when reporting diagnostic"); + if status != PipelineStatus::Success { + return status; } - if had_errors { - return PipelineStatus::AnalysisError; - } let mut bytes = Vec::new(); - - let importer = sources.get(reef_id).expect("unknown reef"); - let contents = importer.list_content_ids(); - let lines = CachedSourceLocationLineProvider::compute(&contents, importer); - - let compiled_reef = compile_reef( - &analyzer.engine, - &analyzer.resolution.relations, - &analyzer.typing, - &analyzer.resolution.engine, - externals, - compiler_externals, - externals.current, - starting_page, + compile_reef( + database, + reef, &mut bytes, CompilerOptions { - line_provider: Some(&lines), + line_provider: None, last_page_storage_var: None, }, ) .expect("write failed"); - compiler_externals.set(reef_id, compiled_reef); - if config.disassemble { display_bytecode(&bytes); } @@ -206,5 +93,5 @@ pub fn use_pipeline( Err(VmError::Internal) => panic!("VM internal error"), } } - PipelineStatus::Success + status } diff --git a/cli/src/disassemble.rs b/cli/src/disassemble.rs index 201e5f16..a0af6dfa 100644 --- a/cli/src/disassemble.rs +++ b/cli/src/disassemble.rs @@ -169,7 +169,6 @@ fn display_code( (digits(dynamic_symbols.len() as u64) - digits(dynsym_idx as u64)) + 10; print!(" {:padding$} // {str}", "") } - Opcode::Exec => print!("", read!(cursor, u8)), Opcode::Open => print!("", read!(cursor, i32)), Opcode::IfJump | Opcode::IfNotJump | Opcode::Jump | Opcode::Fork => { print!("", read!(cursor, u32)) diff --git a/cli/src/main.rs b/cli/src/main.rs index ab4d9963..8ee03a5d 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,19 +1,14 @@ use crate::cli::{use_pipeline, Cli}; -use crate::pipeline::{ErrorReporter, PipelineStatus, SourcesCache}; -use crate::repl::{code, repl}; -use crate::std::build_std; +use crate::pipeline::RealFilesystem; +use crate::repl::repl; use crate::terminal::signal_hook; -use ::std::ffi::OsStr; -use ::std::io; -use ::std::path::Path; -use analyzer::name::Name; -use analyzer::reef::Externals; -use analyzer::relations::SourceId; -use analyzer::Analyzer; +use ::cli::pipeline::PipelineStatus; +use analyzer::{analyze_multi, Database, Reef}; use clap::{CommandFactory, Parser}; -use compiler::externals::CompilerExternals; -use miette::{Context, IntoDiagnostic, MietteHandlerOpts}; use nix::sys::signal; +use std::ffi::OsString; +use std::io; +use std::path::PathBuf; use vm::VM; mod cli; @@ -22,10 +17,9 @@ mod disassemble; mod pipeline; mod repl; mod report; -mod std; mod terminal; -fn main() -> Result { +fn main() -> miette::Result { if cfg!(unix) && !cfg!(miri) { // Override Rust's default `SIGPIPE` signal handler that ignores the signal. // `println!` will no longer panic since the process will be killed before @@ -37,7 +31,7 @@ fn main() -> Result { signal_hook(signal::Signal::SIGPIPE, signal::SigHandler::SigDfl); } - let cli = Cli::parse(); + let mut cli = Cli::parse(); if let Some(generator) = cli.completions { let mut cmd = Cli::command(); @@ -51,101 +45,32 @@ fn main() -> Result { return Ok(PipelineStatus::Success); } - miette::set_hook(Box::new(|_| { - Box::new(MietteHandlerOpts::new().tab_width(2).build()) - })) - .expect("miette options setup"); - - let mut externals = Externals::default(); - let mut compiler_externals = CompilerExternals::default(); - let mut sources = SourcesCache::default(); let mut vm = VM::new( cli.source .iter() .flat_map(|p| p.to_str()) .map(ToOwned::to_owned) - .chain(cli.program_arguments.clone()) + .chain(std::mem::take(&mut cli.program_arguments)) .collect(), ); - - let current_dir = ::std::env::current_dir() - .into_diagnostic() - .context("Could not locate working directory")?; - - build_std( - &mut externals, - &mut compiler_externals, - &mut vm, - &mut sources, - &cli, - ); - - if let Some(source) = &cli.source { - return run(source, &cli, sources, externals, compiler_externals, vm); - } - if let Some(source) = cli.code.clone() { - return code( - source, - current_dir, - &cli, - sources, - externals, - compiler_externals, - vm, - ); + let fs = RealFilesystem { + root: PathBuf::new(), + }; + let mut database = Database::default(); + if let Some(source) = cli.source.take() { + return Ok(run(source, &mut database, &fs, &mut vm, &cli)); } - - repl( - current_dir, - &cli, - sources, - externals, - compiler_externals, - vm, - ) + repl(&cli, &mut database, &fs, &mut vm) } fn run( - source: &Path, - cli: &Cli, - mut sources: SourcesCache, - externals: Externals, - mut compiler_externals: CompilerExternals, - mut vm: VM, -) -> Result { - let name = Name::new( - source - .file_name() - .and_then(OsStr::to_str) - .expect("Incompatible filename"), - ); - - let folder_path = { - let mut path = source.to_path_buf(); - path.pop(); - path - }; - - sources.register(folder_path); - let importer = sources.last_mut(); - importer.add_redirection(name.clone(), source.to_path_buf()); - - let mut analyzer = Analyzer::new(); - analyzer.process(name.clone(), importer, &externals); - - let diagnostics = analyzer.take_diagnostics(); - let errors = importer.take_errors(); - - Ok(use_pipeline( - &name, - SourceId(0), - &analyzer, - &externals, - &mut compiler_externals, - &mut vm, - diagnostics, - errors, - &sources, - cli, - )) + source: PathBuf, + database: &mut Database, + fs: &RealFilesystem, + vm: &mut VM, + config: &Cli, +) -> PipelineStatus { + let mut reef = Reef::new(OsString::from("foo")); + let errors = analyze_multi(database, &mut reef, fs, &source.display().to_string()); + use_pipeline(database, &reef, fs, vm, errors, config) } diff --git a/cli/src/pipeline.rs b/cli/src/pipeline.rs index e8f03754..0ca5b3d6 100644 --- a/cli/src/pipeline.rs +++ b/cli/src/pipeline.rs @@ -1,18 +1,7 @@ -use std::collections::HashMap; -use std::fs::read_to_string; -use std::io; -use std::path::{PathBuf, MAIN_SEPARATOR_STR}; +use analyzer::Filesystem; +use std::path::{Path, PathBuf}; use std::process::{ExitCode, Termination}; -use analyzer::importer::{ASTImporter, ImportResult, Imported}; -use analyzer::name::Name; -use analyzer::reef::ReefId; -use ast::group::Block; -use ast::Expr; -use context::source::{ContentId, OwnedSource, Source, SourceSegmentHolder}; -use parser::err::ParseError; -use parser::parse; - /// Represents the state of the pipeline. #[repr(u8)] #[derive(PartialEq, Debug, Clone, Copy, Eq)] @@ -47,187 +36,14 @@ impl Termination for PipelineStatus { } } -/// A collection of parse errors that are bound to a unique source. -#[derive(Debug)] -pub struct SourceAwareParseErrors { - /// The source identifier from which the errors were generated. - pub source: ContentId, - - /// The generated errors. - pub errors: Vec, -} - -/// A failure that occurred while importing a source with a [`FileImporter`]. -#[derive(Debug)] -pub enum FileImportError { - /// An IO error occurred while reading the source. - IO { inner: io::Error, path: PathBuf }, - - /// Some parse errors occurred after reading the source. - Parse(SourceAwareParseErrors), -} - -/// An importer that imports sources from the file system. -pub struct FileImporter { - /// The root directory from which the files are read. - root: PathBuf, - - /// The imported sources, as an importer is the owner of the sources. - sources: Vec, - - /// Paths exceptions to look for when importing a source. - redirections: HashMap, - - /// The errors that occurred while importing the sources. - /// - /// They contains the specific errors that were masked when using the - /// [`ASTImporter`] trait. - errors: Vec, -} - -#[derive(Default)] -pub struct SourcesCache { - importers: Vec, -} - -impl SourcesCache { - /// Gets the importer for the given reef. - pub fn get(&self, reef: ReefId) -> Option<&FileImporter> { - self.importers.get(reef.0 - 1) - } - - /// Registers a new importer for the given reef. - pub fn register(&mut self, root: PathBuf) { - self.importers.push(FileImporter::new(root)); - } - - /// Gets the last importer. - pub fn last_mut(&mut self) -> &mut FileImporter { - self.importers.last_mut().unwrap() - } -} - -impl FileImporter { - /// Creates a new file importer that will import sources from the given - /// root directory. - pub fn new(root: PathBuf) -> Self { - Self { - sources: vec![], - root, - redirections: HashMap::new(), - errors: Vec::new(), - } - } - - /// Inserts a new source into the importer. - pub fn insert(&mut self, mut source: OwnedSource) -> ImportResult { - let id = self.sources.len(); - // Remove the shebang if it exists - if source.source.strip_prefix("#!").is_some() { - // Remove first line - source.source.drain( - ..source - .source - .find('\n') - .map(|n| n + 1) - .unwrap_or(source.source.len()), - ); - } - self.sources.push(source); - let source = self - .sources - .last() - .expect("the source was just inserted") - .as_source(); - - let report = parse(source.source); - if report.is_ok() { - let expressions = report.expr; - ImportResult::Success(Imported { - content: ContentId(id), - expr: Expr::Block(Block { - expressions, - segment: source.segment(), - }), - }) - } else { - self.errors - .push(FileImportError::Parse(SourceAwareParseErrors { - source: ContentId(id), - errors: report.errors, - })); - ImportResult::Failure - } - } - - /// Adds a special name to path mapping to the importer. - pub fn add_redirection(&mut self, name: Name, path: PathBuf) { - self.redirections.insert(name, path); - } - - /// Gets the search path for a given name, by applying any existing redirection. - fn get_search_path(&self, name: &Name) -> PathBuf { - if let Some(path) = self.redirections.get(name) { - path.clone() - } else { - let mut path = self.root.clone(); - path.push(name.parts().to_owned().join(MAIN_SEPARATOR_STR)); - path.with_extension("msh") - } - } -} - -impl ASTImporter for FileImporter { - fn import(&mut self, name: &Name) -> ImportResult { - let path = self.get_search_path(name); - match read_to_string(&path) { - Ok(content) => self.insert(OwnedSource::new( - content, - path.strip_prefix(&self.root) - .expect("not relative") - .display() - .to_string(), - )), - Err(err) => { - if err.kind() == io::ErrorKind::NotFound { - ImportResult::NotFound - } else { - self.errors.push(FileImportError::IO { inner: err, path }); - ImportResult::Failure - } - } - } - } -} - -pub trait SourceHolder { - /// Gets a source from the importer. - fn get_source(&self, id: ContentId) -> Option; - - /// Lists all the contents ids that are available in the importer. - fn list_content_ids(&self) -> Vec; -} - -/// A trait to access errors and to get sources from an importer. -pub trait ErrorReporter { - /// Takes the errors from the importer. - /// - /// This leaves the importer in a state where it has no errors. - fn take_errors(&mut self) -> Vec; -} - -impl SourceHolder for FileImporter { - fn get_source(&self, id: ContentId) -> Option { - self.sources.get(id.0).map(|s| s.as_source()) - } - - fn list_content_ids(&self) -> Vec { - (0..self.sources.len()).map(ContentId).collect() - } +pub(super) struct RealFilesystem { + pub(super) root: PathBuf, } -impl ErrorReporter for FileImporter { - fn take_errors(&mut self) -> Vec { - std::mem::take(&mut self.errors) +impl Filesystem for RealFilesystem { + fn read(&self, path: &Path) -> std::io::Result { + let mut path = self.root.join(path); + path.set_extension("msh"); + std::fs::read_to_string(path) } } diff --git a/cli/src/repl.rs b/cli/src/repl.rs index 15971ad3..bba0a70e 100644 --- a/cli/src/repl.rs +++ b/cli/src/repl.rs @@ -1,3 +1,5 @@ +use analyzer::{Database, Reef}; +use cli::pipeline::PipelineStatus; use miette::{Context, IntoDiagnostic}; use nu_ansi_term::Color; use reedline::{ @@ -6,36 +8,26 @@ use reedline::{ Reedline, ReedlineEvent, ReedlineMenu, Signal, ValidationResult, Validator, }; use std::borrow::Cow; +use std::ffi::OsString; use std::io::{self, BufRead, IsTerminal, StdinLock}; -use std::path::PathBuf; -use analyzer::importer::ImportResult; -use analyzer::name::Name; -use analyzer::reef::Externals; -use analyzer::relations::SourceId; -use analyzer::{Analyzer, Inject}; use cli::project_dir; -use compiler::externals::CompilerExternals; -use context::source::OwnedSource; use lexer::is_unterminated; use vm::VM; -use crate::cli::{use_pipeline, Cli}; +use crate::cli::Cli; use crate::complete::MoshellCompleter; -use crate::pipeline::{ErrorReporter, PipelineStatus, SourcesCache}; +use crate::pipeline::RealFilesystem; use crate::terminal::acquire_terminal; /// Indefinitely prompts a new expression from stdin and executes it. pub(crate) fn repl( - dir: PathBuf, config: &Cli, - mut sources: SourcesCache, - externals: Externals, - mut compiler_externals: CompilerExternals, - mut vm: VM, + database: &mut Database, + fs: &RealFilesystem, + vm: &mut VM, ) -> miette::Result { - let mut analyzer = Analyzer::new(); - sources.register(dir); + let mut reef = Reef::new(OsString::from("stdin")); let mut editor = if io::stdin().is_terminal() && cfg!(not(miri)) { #[cfg(unix)] @@ -49,26 +41,25 @@ pub(crate) fn repl( // Keep track of the previous attributed source, so that we can inject // the next one into the same context. - let mut starting_source: Option = None; - let name = Name::new("stdin"); + //let mut starting_source: Option = None; loop { let line = editor.read_line(&Prompt); match line { Ok(Signal::Success(source)) => { - let source = OwnedSource::new(source, "stdin".to_owned()); - status = status.compose(consume( - &name, - &mut analyzer, - &externals, - &mut compiler_externals, - &mut vm, - &mut sources, - config, - &mut starting_source, - source, - )); + // let source = OwnedSource::new(source, "stdin".to_owned()); + // status = status.compose(consume( + // &name, + // &mut analyzer, + // &externals, + // &mut compiler_externals, + // &mut vm, + // &mut sources, + // config, + // &mut starting_source, + // source, + // )); } Ok(Signal::CtrlC) => eprintln!("^C"), Ok(Signal::CtrlD) => break Ok(status), @@ -80,107 +71,6 @@ pub(crate) fn repl( } } -/// Analyse and consume a source string. -pub(crate) fn code( - code: String, - dir: PathBuf, - config: &Cli, - mut sources: SourcesCache, - externals: Externals, - mut compiler_externals: CompilerExternals, - mut vm: VM, -) -> miette::Result { - let name = "direct"; - let source = OwnedSource::new(code, name.to_owned()); - let mut analyzer = Analyzer::new(); - sources.register(dir); - Ok(consume( - &Name::new(name), - &mut analyzer, - &externals, - &mut compiler_externals, - &mut vm, - &mut sources, - config, - &mut None, - source, - )) -} - -/// Processes a source and returns the pipeline status. -#[allow(clippy::too_many_arguments)] -fn consume( - name: &Name, - analyzer: &mut Analyzer<'_>, - externals: &Externals, - compiler_externals: &mut CompilerExternals, - vm: &mut VM, - sources: &mut SourcesCache, - config: &Cli, - starting_source: &mut Option, - source: OwnedSource, -) -> PipelineStatus { - let importer = sources.last_mut(); - if let ImportResult::Success(imported) = importer.insert(source) { - let mut analysis = analyzer.inject( - Inject { - name: name.clone(), - imported, - attached: *starting_source, - }, - importer, - externals, - ); - - // Reuse the same diagnotics by moving them, requiring to keep track - // if there was any error since they will be consumed before being - // able to cancel the analysis (the errors need the context that is - // dropped when the analysis is reverted). - let diagnostics = analysis.take_diagnostics(); - let is_ready = diagnostics.is_empty(); - - let errors = importer.take_errors(); - let status = use_pipeline( - name, - analysis.attributed_id(), - analysis.analyzer(), - externals, - compiler_externals, - vm, - diagnostics, - errors, - sources, - config, - ); - - // Remember the successfully injected source, or revert the analysis. - if is_ready { - *starting_source = Some(analysis.attributed_id()); - } else { - analysis.revert(); - } - status - } else { - // Probably hit some parse errors, so we skip any further analysis and - // directly display the errors. There should be no actual diagnostics - // in the pipeline, but we consume them anyway to reuse the same - // end-of-pipeline logic. - let diagnostics = analyzer.take_diagnostics(); - use_pipeline( - name, - SourceId(0), // this value has no importance - analyzer, - externals, - compiler_externals, - vm, - diagnostics, - importer.take_errors(), - sources, - config, - ) - } -} - /// The REPL editor. enum Editor<'a> { /// An interactive line editor. diff --git a/cli/src/report.rs b/cli/src/report.rs index 5245e5a2..9ceaefba 100644 --- a/cli/src/report.rs +++ b/cli/src/report.rs @@ -1,139 +1,160 @@ -use std::io; -use std::io::Write; +use std::path::PathBuf; -use analyzer::diagnostic::Diagnostic; -use analyzer::engine::Engine; -use miette::{LabeledSpan, MietteDiagnostic, Report, Severity, SourceSpan}; +use analyzer::symbol::SymbolDesc; +use analyzer::typing::{TypeError, TypeErrorKind}; +use analyzer::{Filesystem, PipelineError, SourceLocation}; +use miette::{LabeledSpan, MietteDiagnostic, Severity, SourceOffset, SourceSpan}; -use analyzer::reef::{Externals, ReefId}; -use context::source::{ContentId, Source, SourceSegment}; -use parser::err::{ParseError, ParseErrorKind}; +use context::source::Span; -use crate::pipeline::{SourceHolder, SourcesCache}; - -fn offset_empty_span(span: SourceSegment) -> SourceSpan { - if span.start == span.end { - (span.start - 1..span.end).into() - } else { - span.into() +pub fn error_to_diagnostic( + value: PipelineError, + multi_file: &mut MultiFile, + fs: &dyn Filesystem, +) -> MietteDiagnostic { + match value { + PipelineError::Import { path, error, cause } => { + let mut diagnostic = + MietteDiagnostic::new(format!("unable to import {}: {error}", path.display())); + if let Some(SourceLocation { path, span }) = cause { + let span = multi_file.insert(path, span, fs); + diagnostic = diagnostic.with_label(LabeledSpan::new_with_span(None, span)) + } + diagnostic + } + PipelineError::Parse { path, error } => { + let span = multi_file.insert(path, error.position, fs); + MietteDiagnostic::new(error.message) + .with_severity(Severity::Error) + .and_label(LabeledSpan::new_with_span(Some("Here".to_string()), span)) + } + PipelineError::Type(error) => type_error_to_diagnostic(error, multi_file, fs), } } -pub fn display_parse_error( - source: Source, - error: ParseError, - writer: &mut W, -) -> io::Result<()> { - let span = offset_empty_span(error.position); - let mut diag = MietteDiagnostic::new(error.message) - .with_severity(Severity::Error) - .and_label(LabeledSpan::new( - Some("Here".to_string()), - span.offset(), - span.len(), - )); - - match error.kind { - ParseErrorKind::Expected(e) => diag = diag.with_help(format!("expected: {e}")), - ParseErrorKind::UnexpectedInContext(e) => diag = diag.with_help(e), - ParseErrorKind::Unpaired(e) => { - let unpaired_span = offset_empty_span(e); - diag = diag.and_label(LabeledSpan::new( - Some("Start".to_string()), - unpaired_span.offset(), - unpaired_span.len(), - )); +fn type_error_to_diagnostic( + TypeError { kind, at }: TypeError, + multi_file: &mut MultiFile, + fs: &dyn Filesystem, +) -> MietteDiagnostic { + let at_span = multi_file.insert(at.path.clone(), at.span, fs); + let mut diagnostic = MietteDiagnostic::new(kind.to_string()) + .with_label(LabeledSpan::new_with_span(None, at_span)); + match kind { + TypeErrorKind::DuplicateSymbol { previous, .. } => { + let previous_span = multi_file.insert(at.path, previous, fs); + diagnostic.and_label(LabeledSpan::new_with_span( + Some("previous declaration here".to_owned()), + previous_span, + )) + } + TypeErrorKind::UndefinedSymbol { + name, + expected, + found: Some(SymbolDesc { registry, span }), + } => { + let symbol_span = multi_file.insert(at.path, span, fs); + diagnostic.message = format!("expected {expected}, found {registry} `{name}`"); + diagnostic.and_label(LabeledSpan::new_with_span( + Some(format!("{registry} defined here")), + symbol_span, + )) } - _ => {} + TypeErrorKind::TypeMismatch { + expected_due_to: Some(expected_due_to), + .. + } => { + let expected_span = multi_file.insert(expected_due_to.path, expected_due_to.span, fs); + diagnostic.and_label(LabeledSpan::new_with_span( + Some("expected here".to_owned()), + expected_span, + )) + } + TypeErrorKind::UnknownField { available, .. } => diagnostic.with_help(format!( + "Available fields: {}", + available.into_iter().collect::>().join(", ") + )), + TypeErrorKind::TypeAnnotationRequired { types, insert_at } => { + let span = multi_file.insert(at.path, insert_at..insert_at, fs); + diagnostic.with_label(LabeledSpan::new_with_span( + Some(format!("::[{}]", types.join(", "))), + span, + )) + } + TypeErrorKind::RepeatedParameterName { name, previous } => { + let previous_span = multi_file.insert(at.path, previous, fs); + diagnostic.and_label(LabeledSpan::new_with_span( + Some(format!("previous declaration of `{name}`")), + previous_span, + )) + } + TypeErrorKind::MethodLikeFieldAccess { name, parentheses } => diagnostic.with_help( + format!("use parentheses to call the method: .{name}{parentheses}",), + ), + _ => diagnostic, } - write_diagnostic(diag, Some(source), writer) } -pub fn display_diagnostic( - externals: &Externals, - current_engine: &Engine, - engine_reef: ReefId, - sources: &SourcesCache, - diagnostic: Diagnostic, - writer: &mut W, -) -> io::Result<()> { - let mut diag = MietteDiagnostic::new(diagnostic.global_message); +#[derive(Default)] +pub struct MultiFile { + sources: Vec, +} - let id = diagnostic.identifier; - diag = if id.critical() { - diag.with_severity(Severity::Error) - .with_code(format!("error[E{:04}]", id.code())) - } else { - diag.with_severity(Severity::Warning) - .with_code(format!("warn[W{:04}]", id.code())) - }; +struct VirtualFile { + name: PathBuf, + source: String, +} - if let Some((head, tail)) = diagnostic.helps.split_first() { - if tail.is_empty() { - diag = diag.with_help(head) +impl MultiFile { + pub fn insert(&mut self, path: PathBuf, span: Span, fs: &dyn Filesystem) -> SourceSpan { + let mut start = 0usize; + for source in &self.sources { + if source.name == path { + return SourceSpan::new(SourceOffset::from(start + span.start), span.len()); + } else { + start += source.source.len(); + } } - let helps = tail.iter().fold(format!("\n- {head}"), |acc, help| { - format!("{acc}\n- {help}") + let source = fs.read(&path).unwrap(); + self.sources.push(VirtualFile { + name: path, + source: source.to_string(), }); - diag = diag.with_help(helps) - } - - struct AttachedSource<'a> { - reef: ReefId, - id: ContentId, - content: Source<'a>, + SourceSpan::new(SourceOffset::from(start + span.start), span.len()) } - - let mut displayed_source: Option = None; - for obs in diagnostic.observations { - let loc = obs.location; - let engine = if engine_reef == loc.reef { - current_engine - } else { - &externals.get_reef(loc.reef).unwrap().engine - }; - - let content_id = engine - .get_original_content(loc.source) - .expect("Unknown source"); - - if displayed_source - .as_ref() - .map_or(true, |s| s.id == content_id && s.reef == loc.reef) - { - let source = sources - .get(loc.reef) - .and_then(|importer| importer.get_source(content_id)) - .expect("Unknown source"); - let span = loc.segment.clone(); - diag = diag.and_label(LabeledSpan::new(obs.message, span.start, span.len())); - displayed_source = Some(AttachedSource { - reef: loc.reef, - id: content_id, - content: source, - }); - } - } - - write_diagnostic(diag, displayed_source.map(|s| s.content), writer) } -fn write_diagnostic( - diagnostic: MietteDiagnostic, - source: Option, - writer: &mut W, -) -> io::Result<()> { - let report = Report::from(diagnostic); - if let Some(source) = source { - unsafe { - //SAFETY: the CLI source is transmuted to a static lifetime, because `report.with_source_code` - // needs a source with a static lifetime. - // The report and the source are then used to display the formatted diagnostic and are immediately dropped after. - let source = std::mem::transmute::>(source); - let report = report.with_source_code(source); - writeln!(writer, "\n{report:?}") +impl miette::SourceCode for MultiFile { + fn read_span<'b>( + &'b self, + span: &SourceSpan, + context_lines_before: usize, + context_lines_after: usize, + ) -> Result + 'b>, miette::MietteError> { + let mut start = 0usize; + for file in &self.sources { + if start + file.source.len() <= span.offset() { + start += file.source.len(); + continue; + } + let local_span = SourceSpan::new(SourceOffset::from(span.offset() - start), span.len()); + let contents = + file.source + .read_span(&local_span, context_lines_before, context_lines_after)?; + let local_span = contents.span(); + let span = SourceSpan::new( + SourceOffset::from(local_span.offset() + start), + local_span.len(), + ); + return Ok(Box::new(miette::MietteSpanContents::new_named( + file.name.to_string_lossy().to_string(), + contents.data(), + span, + contents.line(), + contents.column(), + contents.line_count(), + ))); } - } else { - writeln!(writer, "\n{report:?}") + Err(miette::MietteError::OutOfBounds) } } diff --git a/cli/src/std.rs b/cli/src/std.rs deleted file mode 100644 index d8c17757..00000000 --- a/cli/src/std.rs +++ /dev/null @@ -1,83 +0,0 @@ -use std::path::{Path, PathBuf}; - -use analyzer::analyze; -use analyzer::name::Name; -use analyzer::reef::{Externals, Reef}; -use analyzer::relations::SourceId; -use cli::project_dir; -use compiler::externals::CompilerExternals; -use vm::VM; - -use crate::cli::{use_pipeline, Cli}; -use crate::pipeline::{ErrorReporter, PipelineStatus, SourcesCache}; - -pub fn build_std( - externals: &mut Externals, - compiler_externals: &mut CompilerExternals, - vm: &mut VM, - sources: &mut SourcesCache, - config: &Cli, -) { - let std_file = find_std(); - sources.register(std_file); - let importer = sources.last_mut(); - - let name = Name::new("std"); - let mut analyzer = analyze(name.clone(), importer, externals); - let diagnostics = analyzer.take_diagnostics(); - - let status = use_pipeline( - &name, - SourceId(0), - &analyzer, - externals, - compiler_externals, - vm, - diagnostics, - importer.take_errors(), - sources, - config, - ); - - match status { - PipelineStatus::Success => { - externals.register(Reef::new("std".to_string(), analyzer)); - } - PipelineStatus::IoError => panic!( - "Unable to find the standard library, check the MOSHELL_STD environment variable" - ), - _ => panic!("std build did not succeed"), - } -} - -fn find_std() -> PathBuf { - if let Ok(path) = std::env::var("MOSHELL_STD") { - return PathBuf::from(path); - } - - let mut dir = std::env::current_dir().expect("Could not get current directory"); - dir.push("lib"); - dir.push("std.msh"); - if dir.exists() { - dir.pop(); - return dir; - } - - if let Some(proj_dirs) = project_dir() { - let lib = proj_dirs.data_dir().join("lib"); - if lib.exists() { - return lib; - } - } - - #[cfg(unix)] - { - for path in ["/usr/local/share/moshell/lib", "/usr/share/moshell/lib"] { - let path = Path::new(path); - if path.exists() { - return path.to_path_buf(); - } - } - } - panic!("Could not determine a valid std emplacement. Please provide a valid stdlib path under a MOSHELL_STD= env variable.") -} diff --git a/compiler/src/bytecode.rs b/compiler/src/bytecode.rs index c32d3c14..028af7b3 100644 --- a/compiler/src/bytecode.rs +++ b/compiler/src/bytecode.rs @@ -1,14 +1,12 @@ use std::mem::size_of; +use analyzer::typing::user::{self, TypeId}; +use analyzer::typing::variable::{LocalId, Var}; use num_enum::TryFromPrimitive; use crate::locals::LocalsLayout; use crate::r#type::ValueStackSize; use crate::structure::StructureLayout; -use analyzer::relations::{LocalId, ResolvedSymbol}; -use analyzer::types; -use analyzer::types::hir::Var; -use analyzer::types::ty::TypeRef; #[derive(Debug, Clone)] pub struct Placeholder { @@ -138,10 +136,10 @@ impl<'a> Instructions<'a> { self.emit_code(pop_opcode); } - pub fn emit_box_if_primitive(&mut self, ty: TypeRef) { + pub fn emit_box_if_primitive(&mut self, ty: TypeId) { match ty { - types::EXITCODE | types::BOOL => self.emit_code(Opcode::BoxByte), - types::INT | types::FLOAT => self.emit_code(Opcode::BoxQWord), + user::BOOL_TYPE => self.emit_code(Opcode::BoxByte), + user::INT_TYPE | user::FLOAT_TYPE => self.emit_code(Opcode::BoxQWord), _ => { /* Objects are already on the heap */ } } } @@ -161,7 +159,7 @@ impl<'a> Instructions<'a> { /// emits instructions to assign given local identifier with last operand stack value /// assuming the value size is the given `size` argument pub fn emit_set_local(&mut self, var: LocalId, size: ValueStackSize, layout: &LocalsLayout) { - let index = layout.get_index(var).unwrap(); + let index = layout.get_index(var); let opcode = match size { ValueStackSize::Byte => Opcode::SetLocalByte, ValueStackSize::QWord => Opcode::SetLocalQWord, @@ -173,7 +171,7 @@ impl<'a> Instructions<'a> { pub fn emit_set_capture( &mut self, - capture: ResolvedSymbol, + capture: LocalId, size: ValueStackSize, layout: &LocalsLayout, ) { @@ -204,7 +202,7 @@ impl<'a> Instructions<'a> { /// emits instructions to push to operand stack given local identifier /// assuming the local's size is the given `size` argument pub fn emit_get_local(&mut self, local: LocalId, size: ValueStackSize, layout: &LocalsLayout) { - let index = layout.get_index(local).unwrap(); + let index = layout.get_index(local); let opcode = match size { ValueStackSize::Byte => Opcode::GetLocalByte, ValueStackSize::QWord => Opcode::GetLocalQWord, @@ -216,7 +214,7 @@ impl<'a> Instructions<'a> { pub fn emit_get_capture( &mut self, - capture: ResolvedSymbol, + capture: LocalId, size: ValueStackSize, layout: &LocalsLayout, ) { @@ -238,9 +236,7 @@ impl<'a> Instructions<'a> { /// pushes a reference to the given symbol on the stack's locals pub fn emit_push_stack_ref(&mut self, var: Var, layout: &LocalsLayout) { - self.emit_code(Opcode::PushLocalRef); - let index = layout.get_var_index(var).unwrap(); - self.bytecode.emit_u32(index); + todo!("emit_push_stack_ref") } /// Emits the instructions to push the value of the given external symbol on top of the stack. diff --git a/compiler/src/context.rs b/compiler/src/context.rs index 2ac282f7..bbdcf0f1 100644 --- a/compiler/src/context.rs +++ b/compiler/src/context.rs @@ -1,82 +1,11 @@ -use analyzer::engine::Engine; -use analyzer::environment::Environment; -use analyzer::reef::{Externals, ReefId}; -use analyzer::relations::SourceId; -use analyzer::types::engine::{FunctionId, StructureId, TypedEngine}; -use analyzer::types::ty::{FunctionDesc, Type, TypeRef}; -use analyzer::types::Typing; - -use crate::externals::CompilerExternals; use crate::structure::StructureLayout; -use crate::Captures; - -pub struct EmitterContext<'a, 'e> { - pub(crate) current_reef: ReefId, - pub(crate) typing: &'a Typing, - pub(crate) engine: &'a Engine<'e>, - pub(crate) typed_engine: &'a TypedEngine, - pub(crate) externals: &'a Externals<'e>, - pub(crate) compiler_externals: &'a CompilerExternals, - - /// The currently emitted environment. - /// - /// It may be used to get the name of the current environment or to get the - /// current environment's variables. - pub(crate) environment: &'a Environment, +use analyzer::typing::registry::Registry; +use analyzer::typing::user::TypeArena; - /// The captures variables. - pub(crate) captures: &'a Captures, - - /// The current chunk id. - pub(crate) chunk_id: SourceId, +pub(crate) struct EmitterContext<'a> { + pub(crate) types: &'a TypeArena, + pub(crate) registry: &'a Registry, /// Computed layouts of the current reef pub(crate) layouts: &'a Vec, } - -impl<'a, 'e> EmitterContext<'a, 'e> { - pub fn get_function(&self, reef: ReefId, id: FunctionId) -> Option<&FunctionDesc> { - if reef == self.current_reef { - self.typed_engine.get_function(id) - } else { - self.externals - .get_reef(reef) - .unwrap() - .typed_engine - .get_function(id) - } - } - - pub fn get_layout(&self, reef: ReefId, structure_id: StructureId) -> &StructureLayout { - if reef == self.current_reef { - &self.layouts[structure_id.0] - } else { - &self - .compiler_externals - .get_compiled_reef(reef) - .unwrap() - .layouts[structure_id.0] - } - } - - pub fn get_type(&self, tpe: TypeRef) -> &Type { - if tpe.reef == self.current_reef { - self.typing.get_type(tpe.type_id).unwrap() - } else { - self.externals - .get_reef(tpe.reef) - .unwrap() - .typing - .get_type(tpe.type_id) - .unwrap() - } - } - - pub fn get_engine(&self, reef: ReefId) -> Option<&'a Engine<'e>> { - if self.current_reef == reef { - Some(self.engine) - } else { - self.externals.get_reef(reef).map(|r| &r.engine) - } - } -} diff --git a/compiler/src/emit.rs b/compiler/src/emit.rs index 4f2d0199..f8cc3991 100644 --- a/compiler/src/emit.rs +++ b/compiler/src/emit.rs @@ -1,12 +1,11 @@ -use analyzer::relations::LocalId; -use analyzer::types::hir::{Declaration, ExprKind, TypedExpr, Var}; -use analyzer::types::ty::TypeRef; +use analyzer::hir::{Declaration, ExprKind, TypedExpr}; +use analyzer::typing::user::TypeId; +use analyzer::typing::variable::{LocalId, Var}; use ast::value::LiteralValue; use crate::bytecode::{Instructions, Opcode, Placeholder}; use crate::constant_pool::ConstantPool; use crate::context::EmitterContext; -use crate::emit::identifier::{expose_variable, Identifier}; use crate::emit::invoke::{ emit_capture, emit_function_invocation, emit_pipeline, emit_process_call, emit_redirect, emit_subprocess, emit_substitution, @@ -17,7 +16,6 @@ use crate::emit::structure::{emit_field_access, emit_field_assign}; use crate::locals::LocalsLayout; use crate::r#type::ValueStackSize; -mod identifier; mod invoke; mod iterable; mod jump; @@ -78,9 +76,9 @@ fn emit_literal(literal: &LiteralValue, instructions: &mut Instructions, cp: &mu } fn emit_ref( - var: Var, + var: &Var, ctx: &EmitterContext, - ref_type: TypeRef, + ref_type: TypeId, instructions: &mut Instructions, cp: &mut ConstantPool, locals: &LocalsLayout, @@ -89,14 +87,13 @@ fn emit_ref( if size == ValueStackSize::Zero { return; } - match expose_variable(ctx, var, cp) { - Identifier::Local(id) => { - instructions.emit_get_local(id, size, locals); + match var { + Var::Local(id) => { + instructions.emit_get_local(*id, size, locals); + // TODO see if captured: instructions.emit_get_capture(id, size, locals); } - Identifier::Capture(id) => { - instructions.emit_get_capture(id, size, locals); - } - Identifier::External(id) => { + Var::Global(id) => { + let id = cp.get_external(id).unwrap(); instructions.emit_get_external(id, size); } } @@ -110,28 +107,10 @@ fn emit_declaration( locals: &mut LocalsLayout, state: &mut EmissionState, ) { - let variable = ctx - .environment - .symbols - .get(declaration.identifier) - .expect("The declared variable should be in the current environment."); - if let Some(value) = &declaration.value { - locals.set_value_space(declaration.identifier, value.ty); - - if variable.is_exported() - && ctx.environment.is_script - && ValueStackSize::from(value.ty) != ValueStackSize::Zero - { - let offset = locals - .get_index(declaration.identifier) - .expect("Variable just have been declared"); - cp.insert_exported(&variable.name, offset, value.ty.is_obj()); - } - emit_assignment( value, - Var::Local(declaration.identifier), + &declaration.identifier, instructions, ctx, cp, @@ -161,7 +140,7 @@ fn emit_block( fn emit_assignment( value: &TypedExpr, - var: Var, + var: &Var, instructions: &mut Instructions, ctx: &EmitterContext, cp: &mut ConstantPool, @@ -178,15 +157,14 @@ fn emit_assignment( return; } - match expose_variable(ctx, var, cp) { - Identifier::Local(id) => { - instructions.emit_set_local(id, returned_value_type, locals); - } - Identifier::Capture(id) => { - instructions.emit_set_capture(id, returned_value_type, locals); + match var { + Var::Local(id) => { + instructions.emit_set_local(*id, returned_value_type, locals); + // TODO: instructions.emit_set_capture(id, returned_value_type, locals); } - Identifier::External(id) => { - instructions.emit_set_external(id, returned_value_type); + Var::Global(id) => { + let offset = cp.get_external(id).unwrap(); + instructions.emit_set_external(offset, returned_value_type) } } } @@ -217,7 +195,7 @@ pub fn emit( locals: &mut LocalsLayout, state: &mut EmissionState, ) { - instructions.push_position(expr.segment.start); + instructions.push_position(expr.span.start); match &expr.kind { ExprKind::Declare(d) => { emit_declaration(d, instructions, ctx, cp, locals, state); @@ -233,7 +211,7 @@ pub fn emit( ExprKind::Return(val) => emit_return(val, instructions, ctx, cp, locals, state), ExprKind::LocalAssign(ass) => emit_assignment( &ass.rhs, - ass.identifier, + &ass.identifier, instructions, ctx, cp, @@ -248,7 +226,7 @@ pub fn emit( } ExprKind::Reference(symbol) => { if state.use_values { - emit_ref(*symbol, ctx, expr.ty, instructions, cp, locals); + emit_ref(symbol, ctx, expr.ty, instructions, cp, locals); } } ExprKind::Literal(literal) => { @@ -262,16 +240,9 @@ pub fn emit( ExprKind::ProcessCall(args) => { emit_process_call(args, &[], instructions, ctx, cp, locals, state) } - ExprKind::MethodCall(method) => emit_natives( - method.function_id, - method, - expr.ty, - instructions, - ctx, - cp, - locals, - state, - ), + ExprKind::MethodCall(method) => { + emit_natives(method, expr.ty, instructions, ctx, cp, locals, state) + } ExprKind::Redirect(redirect) => { emit_redirect(redirect, instructions, ctx, cp, locals, state) } @@ -288,7 +259,6 @@ pub fn emit( emit_substitution(substitution, instructions, ctx, cp, locals, state); } ExprKind::Noop => {} - ExprKind::Convert(_) => unimplemented!(), } - instructions.push_position(expr.segment.start) + instructions.push_position(expr.span.start) } diff --git a/compiler/src/emit/identifier.rs b/compiler/src/emit/identifier.rs deleted file mode 100644 index 62152f78..00000000 --- a/compiler/src/emit/identifier.rs +++ /dev/null @@ -1,62 +0,0 @@ -use crate::constant_pool::ConstantPool; -use crate::emit::EmitterContext; -use analyzer::relations::{LocalId, ResolvedSymbol}; -use analyzer::types::hir::Var; - -/// An identifier for a variable in the bytecode. -pub(super) enum Identifier { - /// A local variable stored in the classical stack. - Local(LocalId), - - /// A local variable that escape the current scope due to a closure. - Capture(ResolvedSymbol), - - /// An external variable, that is stored separately in a memory page. - External(u32), -} - -/// Converts a symbol to an identifier for the runtime. -/// -/// The runtime needs to treat certain variables as exported to store them separately. -/// It mays refers to a local variable or an external symbol, so it needs to be converted -/// to an identifier before being emitted. -pub(super) fn expose_variable(ctx: &EmitterContext, var: Var, cp: &mut ConstantPool) -> Identifier { - match var { - Var::Local(id) => { - let variable = ctx - .environment - .symbols - .get(id) - .expect("The declared variable should be in the current environment."); - if variable.is_exported() && ctx.environment.is_script { - let name = &variable.name; - let symbol_id = cp - .get_external(name) - .expect("External symbol not previously emitted"); - Identifier::External(symbol_id) - } else { - Identifier::Local(id) - } - } - Var::External(resolved) => { - // Distinguish captures and static variables. - let environment = ctx - .get_engine(resolved.reef) - .unwrap() - .get_environment(resolved.source) - .expect("Resolved relation targets an unknown environment"); - let variable = environment - .symbols - .get(resolved.object_id) - .expect("Resolved relation targets an unknown variable"); - let is_exported_dynsym = variable.is_exported() && environment.is_script; - if is_exported_dynsym { - let import = &environment.fqn; - let name = &variable.name; - Identifier::External(cp.insert_dynsym(&import.to_string(), name)) - } else { - Identifier::Capture(resolved) - } - } - } -} diff --git a/compiler/src/emit/invoke.rs b/compiler/src/emit/invoke.rs index c8ed9bc6..903bbcad 100644 --- a/compiler/src/emit/invoke.rs +++ b/compiler/src/emit/invoke.rs @@ -1,11 +1,9 @@ +use analyzer::hir::{ExprKind, FunctionCall, Redir, Redirect, Subprocess, Substitute, TypedExpr}; +use analyzer::typing::function::FunctionKind; +use analyzer::typing::registry::SchemaId; +use analyzer::typing::user::{TypeId, UserType, INT_TYPE, STRING_TYPE, VECTOR_TYPE}; use libc::{O_APPEND, O_CREAT, O_RDONLY, O_RDWR, O_TRUNC, O_WRONLY}; -use analyzer::relations::ResolvedSymbol; -use analyzer::types::hir::{ - ExprKind, FunctionCall, Redir, Redirect, Subprocess, Substitute, TypedExpr, Var, -}; -use analyzer::types::ty::{FunctionKind, Type, TypeRef}; -use analyzer::types::{GENERIC_VECTOR, INT, STRING}; use ast::call::{RedirFd, RedirOp}; use crate::bytecode::{Instructions, Opcode}; @@ -128,13 +126,13 @@ pub fn emit_process_call( instructions.patch_jump(jump_to_parent); for local in state.opened_files.drain(..) { - instructions.emit_get_local(local, INT.into(), locals); + instructions.emit_get_local(local, INT_TYPE.into(), locals); instructions.emit_code(Opcode::Close); } // Remove the arguments from the stack, as they were only needed for the child process instructions.emit_code(Opcode::Swap); - instructions.emit_pop(GENERIC_VECTOR.into()); + instructions.emit_pop(VECTOR_TYPE.into()); instructions.emit_code(Opcode::Wait); @@ -172,7 +170,7 @@ fn emit_arguments( for arg in arguments { instructions.emit_code(Opcode::Dup); emit(arg, instructions, ctx, cp, locals, state); - if arg.ty == STRING { + if arg.ty == STRING_TYPE { instructions.emit_invoke(cp.insert_string(VEC_PUSH)); } else { instructions.emit_invoke(cp.insert_string(VEC_EXTEND)); @@ -183,7 +181,7 @@ fn emit_arguments( pub fn emit_function_invocation( function_call: &FunctionCall, - call_return_type: TypeRef, + call_return_type: TypeId, instructions: &mut Instructions, ctx: &EmitterContext, cp: &mut ConstantPool, @@ -192,15 +190,13 @@ pub fn emit_function_invocation( ) { let last_used = state.use_values(true); - let function = ctx - .get_function(function_call.reef, function_call.function_id) - .unwrap(); + let function = &ctx.registry[function_call.function_id]; - for (arg, parameter) in function_call.arguments.iter().zip(&function.parameters) { + for (arg, parameter) in function_call.arguments.iter().zip(&function.param_types) { emit(arg, instructions, ctx, cp, locals, state); // The parameter is an object but the argument isn't: may be an argument passed to a generic parameter if parameter.ty.is_obj() && !arg.ty.is_obj() { - instructions.emit_box_if_primitive(arg.ty) + instructions.emit_box_if_primitive(arg.ty); } } @@ -210,30 +206,19 @@ pub fn emit_function_invocation( // current constructors implementation only supports a default, non user-defined constructor // which is a structure that contains the given parameters. - let constructed_structure_id = match ctx.get_type(function.return_type) { - Type::Instantiated(constructed_structure_type, _) => { - let Type::Structure(_, constructed_structure_id) = - ctx.get_type(*constructed_structure_type) - else { - panic!("constructor does not returns a structure type instance") - }; - *constructed_structure_id - } - Type::Structure(_, structure_id) => *structure_id, + let constructed_structure_id: SchemaId = match ctx.types[function.return_type] { + UserType::Parametrized { + schema: constructed_structure_type, + params: _, + } => constructed_structure_type, _ => panic!("constructor does not returns a structure type or structure type instance"), }; - let struct_reef = function.return_type.reef; - // get constructor's structure fully-qualified name - let struct_engine = ctx.get_engine(struct_reef).unwrap(); - let struct_env_id = function_call - .source_id - .expect("cannot call intrinsics functions as regular calls"); - let struct_env = struct_engine.get_environment(struct_env_id).unwrap(); - let struct_fqn = struct_env.fqn.to_string(); + let schema = &ctx.registry[constructed_structure_id]; + let struct_fqn = &schema.name; - let layout = ctx.get_layout(struct_reef, constructed_structure_id); + let layout = &ctx.layouts[constructed_structure_id.get()]; // initialize a new structure instructions.emit_new(cp.insert_string(struct_fqn)); @@ -241,38 +226,7 @@ pub fn emit_function_invocation( // thus we can init it from all the pushed constructor's parameters in the operands instructions.emit_copy_operands(layout.total_size); } else { - let (env, captures) = { - let fun_source = function_call - .source_id - .expect("cannot invoke functions with no environment"); - let captures: &[ResolvedSymbol] = if function_call.reef != ctx.current_reef { - &[] - } else { - ctx.captures[fun_source.0] - .as_ref() - .expect("undefined captures when the function is emitted") - }; - let env = ctx - .get_engine(function_call.reef) - .unwrap() - .get_environment(fun_source) - .unwrap(); - (env, captures) - }; - - for capture in captures { - if capture.source == ctx.chunk_id { - // if its a local value hosted by the caller frame, create a reference - // to the value - instructions.emit_push_stack_ref(Var::Local(capture.object_id), locals); - } else { - // if its a captured variable, get the reference's value from locals - instructions.emit_push_stack_ref(Var::External(*capture), locals); - instructions.emit_code(Opcode::GetRefQWord); - } - } - - let signature_idx = cp.insert_string(&env.fqn); + let signature_idx = cp.insert_string(function.fqn.display()); instructions.emit_invoke(signature_idx); } @@ -316,7 +270,7 @@ pub fn emit_redirect( emit(&redirect.expression, instructions, ctx, cp, locals, state); for local in state.opened_files.drain(..) { - instructions.emit_get_local(local, INT.into(), locals); + instructions.emit_get_local(local, INT_TYPE.into(), locals); instructions.emit_code(Opcode::Close); } for redir in &redirect.redirections { @@ -610,10 +564,10 @@ pub fn emit_substitution( instructions.emit_code(Opcode::Swap); } instructions.emit_code(Opcode::Close); - let local = locals.push_value_space(INT); - instructions.emit_set_local(local, INT.into(), locals); + let local = locals.push_value_space(INT_TYPE); + instructions.emit_set_local(local, INT_TYPE.into(), locals); if state.use_values { - instructions.emit_get_local(local, INT.into(), locals); + instructions.emit_get_local(local, INT_TYPE.into(), locals); instructions.emit_invoke(cp.insert_string("std::process::get_fd_path")); } // Save the fd to close it later (when the callee has finished) diff --git a/compiler/src/emit/iterable.rs b/compiler/src/emit/iterable.rs index f817e7d4..24ccdfe4 100644 --- a/compiler/src/emit/iterable.rs +++ b/compiler/src/emit/iterable.rs @@ -4,14 +4,10 @@ use crate::context::EmitterContext; use crate::emit::native::{STRING_INDEX, STRING_LEN, VEC_INDEX, VEC_LEN}; use crate::emit::{emit, EmissionState}; use crate::locals::LocalsLayout; -use crate::r#type::ValueStackSize; -use analyzer::reef::ReefId; -use analyzer::relations::LocalId; -use analyzer::types::builtin::STRING_STRUCT; -use analyzer::types::engine::StructureId; -use analyzer::types::hir::{ForKind, ForLoop, RangeFor, TypedExpr}; -use analyzer::types::ty::Type; -use analyzer::types::{GENERIC_VECTOR, INT, STRING}; +use analyzer::hir::{ForKind, ForLoop, RangeFor, TypedExpr}; +use analyzer::typing::registry::{STRING_SCHEMA, VEC_SCHEMA}; +use analyzer::typing::user::{UserType, INT_TYPE, STRING_TYPE}; +use analyzer::typing::variable::LocalId; pub(super) fn emit_for_loop( it: &ForLoop, @@ -24,9 +20,9 @@ pub(super) fn emit_for_loop( match it.kind.as_ref() { ForKind::Range(range) => { let type_ref = range.iterable.ty; - let iterable_type = ctx.get_type(type_ref); + let iterable_type = &ctx.types[type_ref]; match iterable_type { - Type::Instantiated(vec, params) if *vec == GENERIC_VECTOR => { + UserType::Parametrized { schema, params } if *schema == VEC_SCHEMA => { let param = params[0]; emit_for_iterable( range, @@ -54,7 +50,7 @@ pub(super) fn emit_for_loop( state, ); } - Type::Structure(_, string) if *string == STRING_STRUCT => { + UserType::Parametrized { schema, params: _ } if *schema == STRING_SCHEMA => { emit_for_iterable( range, &it.body, @@ -69,7 +65,7 @@ pub(super) fn emit_for_loop( }, Opcode::IntLessThan, |_, instructions, cp, locals| { - instructions.emit_get_local(range.receiver, STRING.into(), locals); + instructions.emit_get_local(range.receiver, STRING_TYPE.into(), locals); instructions.emit_invoke(cp.insert_string(STRING_LEN)); }, instructions, @@ -79,40 +75,40 @@ pub(super) fn emit_for_loop( state, ); } - Type::Structure(_, structure_id) => { - // Int range - let layout = ctx.get_layout(ReefId(1), *structure_id); - emit_for_iterable( - range, - &it.body, - |iterator_id, instructions, _, locals| { - // Emit start - instructions.emit_get_local(iterator_id, type_ref.into(), locals); - instructions.emit_get_field(LocalId(0), layout); - }, - |instructions, _| { - instructions.emit_code(Opcode::Swap); - instructions.emit_pop(ValueStackSize::QWord); - }, - |instructions, _| { - instructions.emit_get_field(LocalId(1), layout); - }, - if *structure_id == StructureId(0) { - Opcode::IntLessThan - } else { - Opcode::IntLessOrEqual - }, - |iterator_id, instructions, _, locals| { - instructions.emit_get_local(iterator_id, type_ref.into(), locals); - instructions.emit_get_field(LocalId(2), layout); - }, - instructions, - ctx, - cp, - locals, - state, - ); - } + // Type::Structure(_, structure_id) => { + // // Int range + // let layout = ctx.get_layout(ReefId(1), *structure_id); + // emit_for_iterable( + // range, + // &it.body, + // |iterator_id, instructions, _, locals| { + // // Emit start + // instructions.emit_get_local(iterator_id, type_ref.into(), locals); + // instructions.emit_get_field(LocalId(0), layout); + // }, + // |instructions, _| { + // instructions.emit_code(Opcode::Swap); + // instructions.emit_pop(ValueStackSize::QWord); + // }, + // |instructions, _| { + // instructions.emit_get_field(LocalId(1), layout); + // }, + // if *structure_id == StructureId(0) { + // Opcode::IntLessThan + // } else { + // Opcode::IntLessOrEqual + // }, + // |iterator_id, instructions, _, locals| { + // instructions.emit_get_local(iterator_id, type_ref.into(), locals); + // instructions.emit_get_field(LocalId(2), layout); + // }, + // instructions, + // ctx, + // cp, + // locals, + // state, + // ); + // } _ => panic!("Unexpected iterable {iterable_type:?} type"), } } @@ -179,13 +175,13 @@ pub(super) fn emit_for_iterable< state.use_values(last_used); instructions.emit_set_local(iterator_id, iterable.ty.into(), locals); - let index_id = locals.push_value_space(INT); + let index_id = locals.push_value_space(INT_TYPE); initial_value(iterator_id, instructions, cp, locals); - instructions.emit_set_local(index_id, INT.into(), locals); + instructions.emit_set_local(index_id, INT_TYPE.into(), locals); let loop_start = instructions.current_ip(); let mut loop_state = EmissionState::in_loop(); - instructions.emit_get_local(index_id, INT.into(), locals); + instructions.emit_get_local(index_id, INT_TYPE.into(), locals); instructions.emit_get_local(iterator_id, iterable.ty.into(), locals); len(instructions, cp); instructions.emit_code(comparator); @@ -193,9 +189,8 @@ pub(super) fn emit_for_iterable< loop_state.enclosing_loop_end_placeholders.push(jump_to_end); // Indexes the iterable and stores the result in the receiver. - locals.set_value_space(*receiver, *receiver_type); instructions.emit_get_local(iterator_id, iterable.ty.into(), locals); - instructions.emit_get_local(index_id, INT.into(), locals); + instructions.emit_get_local(index_id, INT_TYPE.into(), locals); indexer(instructions, cp); instructions.emit_set_local(*receiver, (*receiver_type).into(), locals); @@ -203,10 +198,10 @@ pub(super) fn emit_for_iterable< for jump_to_increment in loop_state.enclosing_loop_start_placeholders { instructions.patch_jump(jump_to_increment); } - instructions.emit_get_local(index_id, INT.into(), locals); + instructions.emit_get_local(index_id, INT_TYPE.into(), locals); increment(iterator_id, instructions, cp, locals); instructions.emit_code(Opcode::IntAdd); - instructions.emit_set_local(index_id, INT.into(), locals); + instructions.emit_set_local(index_id, INT_TYPE.into(), locals); instructions.jump_back_to(loop_start); for jump_to_end in loop_state.enclosing_loop_end_placeholders { diff --git a/compiler/src/emit/jump.rs b/compiler/src/emit/jump.rs index 75768976..9080669f 100644 --- a/compiler/src/emit/jump.rs +++ b/compiler/src/emit/jump.rs @@ -1,9 +1,8 @@ -use analyzer::types::hir::{Conditional, Loop}; - use crate::bytecode::{Instructions, Opcode}; use crate::constant_pool::ConstantPool; use crate::emit::{emit, EmissionState, EmitterContext}; use crate::locals::LocalsLayout; +use analyzer::hir::{Conditional, Loop}; pub fn emit_conditional( conditional: &Conditional, diff --git a/compiler/src/emit/native.rs b/compiler/src/emit/native.rs index c0db4410..ce867843 100644 --- a/compiler/src/emit/native.rs +++ b/compiler/src/emit/native.rs @@ -1,12 +1,9 @@ -use analyzer::types::engine::FunctionId; -use analyzer::types::hir::MethodCall; -use analyzer::types::ty::TypeRef; - -use crate::bytecode::{Instructions, Opcode}; +use crate::bytecode::Instructions; use crate::constant_pool::ConstantPool; -use crate::emit::{emit, EmissionState, EmitterContext}; +use crate::emit::{EmissionState, EmitterContext}; use crate::locals::LocalsLayout; -use crate::r#type::ValueStackSize; +use analyzer::hir::MethodCall; +use analyzer::typing::user::TypeId; const STRING_EQ: &str = "lang::String::eq"; const STRING_CONCAT: &str = "lang::String::concat"; @@ -26,308 +23,18 @@ const STRING_BYTES: &str = "lang::String::bytes"; const GLOB_EXPAND: &str = "lang::glob::expand"; /// Emits a primitive sequence of instructions. -#[allow(clippy::get_first, clippy::too_many_arguments)] pub(crate) fn emit_natives( - native: FunctionId, MethodCall { callee, arguments: args, .. }: &MethodCall, - receiver_ty: TypeRef, + receiver_ty: TypeId, instructions: &mut Instructions, ctx: &EmitterContext, cp: &mut ConstantPool, locals: &mut LocalsLayout, state: &mut EmissionState, ) { - let last_used = state.use_values(true); - emit(callee, instructions, ctx, cp, locals, state); - - match native.0 { - 0 => { - // ExitCode -> Bool - instructions.emit_bool_inversion(); - } - 1..=9 => { - // Arithmetic expression - emit( - args.get(0).expect("A binary expression takes two operands"), - instructions, - ctx, - cp, - locals, - state, - ); - instructions.emit_code(match native.0 { - 1 => Opcode::IntAdd, - 2 => Opcode::FloatAdd, - 3 => Opcode::IntSub, - 4 => Opcode::FloatSub, - 5 => Opcode::IntMul, - 6 => Opcode::FloatMul, - 7 => Opcode::IntDiv, - 8 => Opcode::FloatDiv, - 9 => Opcode::IntMod, - _ => unreachable!("Not a numeric binary expression"), - }); - } - 10 => { - // !bool - instructions.emit_bool_inversion(); - } - 11..=26 => { - // Comparison expression - emit( - args.get(0).expect("A comparison takes two operands"), - instructions, - ctx, - cp, - locals, - state, - ); - - match native.0 { - 11 => { - // bool == bool - instructions.emit_code(Opcode::BXor); - instructions.emit_bool_inversion(); - } - 12 => { - // bool != bool - instructions.emit_code(Opcode::BXor); - } - 13 => { - // String == String - instructions.emit_invoke(cp.insert_string(STRING_EQ)); - } - 14 => { - // String != String - instructions.emit_invoke(cp.insert_string(STRING_EQ)); - instructions.emit_bool_inversion(); - } - 15 => instructions.emit_code(Opcode::IntEqual), - 16 => { - // Int != Int - instructions.emit_code(Opcode::IntEqual); - instructions.emit_bool_inversion(); - } - 17 => instructions.emit_code(Opcode::IntLessThan), - 18 => instructions.emit_code(Opcode::IntLessOrEqual), - 19 => instructions.emit_code(Opcode::IntGreaterThan), - 20 => instructions.emit_code(Opcode::IntGreaterOrEqual), - 21 => instructions.emit_code(Opcode::FloatEqual), - 22 => { - // Float != Float - instructions.emit_code(Opcode::FloatEqual); - instructions.emit_bool_inversion(); - } - 23 => instructions.emit_code(Opcode::FloatLessThan), - 24 => instructions.emit_code(Opcode::FloatLessOrEqual), - 25 => instructions.emit_code(Opcode::FloatGreaterThan), - 26 => instructions.emit_code(Opcode::FloatGreaterOrEqual), - _ => unreachable!("Not a comparison expression"), - } - } - 27 => { - // Bool -> String - // Emit the opcodes for: - // if (bool) { - // "true" - // } else { - // "false" - // } - let true_string = cp.insert_string("true"); - let false_string = cp.insert_string("false"); - let jump_to_else = instructions.emit_jump(Opcode::IfNotJump); - instructions.emit_push_constant_ref(true_string); - let jump_to_end = instructions.emit_jump(Opcode::Jump); - instructions.patch_jump(jump_to_else); - instructions.emit_push_constant_ref(false_string); - instructions.patch_jump(jump_to_end); - } - 28 => { - // ExitCode -> String - instructions.emit_code(Opcode::ConvertByteToInt); - instructions.emit_invoke(cp.insert_string(INT_TO_STRING)); - } - 29 | 54 => { - // Int -> String - instructions.emit_invoke(cp.insert_string(INT_TO_STRING)); - } - 30 => { - // Float -> String - instructions.emit_invoke(cp.insert_string(FLOAT_TO_STRING)); - } - 32 => { - // String.len() -> Int - instructions.emit_invoke(cp.insert_string(STRING_LEN)); - } - 33 => { - // String + String -> String - emit( - args.get(0) - .expect("Cannot concatenate a string without a second string"), - instructions, - ctx, - cp, - locals, - state, - ); - instructions.emit_invoke(cp.insert_string(STRING_CONCAT)); - } - 34 => { - // vector[Int] -> T - emit( - args.get(0).expect("Cannot index a vector without an index"), - instructions, - ctx, - cp, - locals, - state, - ); - instructions.emit_invoke(cp.insert_string(VEC_INDEX)); - } - 35 => { - // vector.push(T) - let first = args - .first() - .expect("Cannot push to a vector without a value"); - emit(first, instructions, ctx, cp, locals, state); - if state.use_values { - instructions.emit_box_if_primitive(first.ty); - } - instructions.emit_invoke(cp.insert_string(VEC_PUSH)); - } - 36 => { - // vector.pop() T - instructions.emit_invoke(cp.insert_string(VEC_POP)); - } - 37 => { - // vector.len() - instructions.emit_invoke(cp.insert_string(VEC_LEN)); - } - 38 => { - // string.split(delim) - emit( - args.get(0) - .expect("Cannot split a string without a delimiter"), - instructions, - ctx, - cp, - locals, - state, - ); - instructions.emit_invoke(cp.insert_string(STRING_SPLIT)); - } - 39 => { - // string.bytes() - instructions.emit_invoke(cp.insert_string(STRING_BYTES)); - } - 40 | 42 => { - // Bool && Bool -> Bool - instructions.emit_code(Opcode::DupByte); - let end_jump = instructions.emit_jump(if native.0 == 42 { - Opcode::IfJump - } else { - Opcode::IfNotJump - }); - instructions.emit_pop(ValueStackSize::Byte); - emit( - args.get(0) - .expect("Cannot AND a boolean without a second boolean"), - instructions, - ctx, - cp, - locals, - state, - ); - instructions.patch_jump(end_jump); - } - 41 | 43 => { - // Bool || Bool -> Bool - instructions.emit_code(Opcode::DupByte); - let else_jump = instructions.emit_jump(if native.0 == 43 { - Opcode::IfJump - } else { - Opcode::IfNotJump - }); - let end_jump = instructions.emit_jump(Opcode::Jump); - instructions.patch_jump(else_jump); - instructions.emit_pop(ValueStackSize::Byte); - emit( - args.get(0) - .expect("Cannot OR a boolean without a second boolean"), - instructions, - ctx, - cp, - locals, - state, - ); - instructions.patch_jump(end_jump); - } - 44 => { - // -Int -> Int - instructions.emit_code(Opcode::IntNeg); - } - 45 => { - // -Float -> Float - instructions.emit_code(Opcode::FloatNeg); - } - 46 | 47 => { - // Option[T].is_some() -> bool - instructions.emit_push_int(0); - instructions.emit_code(Opcode::IntEqual); - if native.0 == 47 { - instructions.emit_bool_inversion(); - } - } - 48 => { - // Option[T].unwrap() -> T - instructions.emit_code(Opcode::Dup); - instructions.emit_push_int(0); - instructions.emit_code(Opcode::IntEqual); - let end_jump = instructions.emit_jump(Opcode::IfNotJump); - instructions.emit_push_constant_ref(cp.insert_string("Cannot unwrap `None`.")); - instructions.emit_invoke(cp.insert_string("std::panic")); - instructions.patch_jump(end_jump); - } - 49 => { - // Vec[T][int] = T - for arg in args { - emit(arg, instructions, ctx, cp, locals, state); - } - instructions.emit_box_if_primitive(args[1].ty); - instructions.emit_invoke(cp.insert_string(VEC_INDEX_EQ)); - } - 50 => { - // Int -> Exitcode - instructions.emit_code(Opcode::ConvertIntToByte); - } - 51 => { - // Exitcode -> Int - instructions.emit_code(Opcode::ConvertByteToInt); - } - 52 => { - // Vec[A]::pop_head() -> Option[A] - instructions.emit_invoke(cp.insert_string(VEC_POP_HEAD)); - } - 53 => { - // Glob::spread() -> Vec[String] - instructions.emit_invoke(cp.insert_string(GLOB_EXPAND)); - } - id => todo!("Native function with id {id}"), - }; - - let has_generic_return = matches!(native.0, 34 | 36 | 48); - if has_generic_return && !receiver_ty.is_obj() { - instructions.emit_code(Opcode::Unbox); - } - - // restore last state of use_values - state.use_values(last_used); - - if !state.use_values { - instructions.emit_pop(ValueStackSize::from(receiver_ty)); - } + todo!("Emit native function calls") } diff --git a/compiler/src/emit/structure.rs b/compiler/src/emit/structure.rs index 27448861..eefecc60 100644 --- a/compiler/src/emit/structure.rs +++ b/compiler/src/emit/structure.rs @@ -1,9 +1,8 @@ -use analyzer::types::hir::{FieldAccess, FieldAssign}; - use crate::bytecode::Instructions; use crate::constant_pool::ConstantPool; use crate::emit::{emit, EmissionState, EmitterContext}; use crate::locals::LocalsLayout; +use analyzer::hir::{FieldAccess, FieldAssign}; pub fn emit_field_access( access: &FieldAccess, @@ -16,7 +15,7 @@ pub fn emit_field_access( let uses = state.use_values(true); emit(&access.object, instructions, ctx, cp, locals, state); state.use_values(uses); - let layout = ctx.get_layout(access.structure_reef, access.structure); + let layout = &ctx.layouts[access.structure.get()]; instructions.emit_get_field(access.field, layout); } @@ -33,7 +32,7 @@ pub fn emit_field_assign( emit(&assign.new_value, instructions, ctx, cp, locals, state); state.use_values(value_used); - let layout = ctx.get_layout(assign.structure_reef, assign.structure); + let layout = &ctx.layouts[assign.structure.get()]; instructions.emit_set_field(assign.field, layout); } diff --git a/compiler/src/externals.rs b/compiler/src/externals.rs deleted file mode 100644 index d5ed2359..00000000 --- a/compiler/src/externals.rs +++ /dev/null @@ -1,35 +0,0 @@ -use analyzer::reef::{ReefId, LANG_REEF}; - -use crate::structure::StructureLayout; - -#[derive(Debug, Clone, PartialEq, Default)] -pub struct CompiledReef { - pub layouts: Vec, -} - -/// contains information about external reefs. -/// This structure is an overload of the analyzer's external with -/// additional information required for compilation. -/// ReefIds starts at: the lang reef is not compiled, and its structures are intrisics to the compiler -/// thus it will never try to access a compiled reef for the lang reef. -#[derive(Default)] -pub struct CompilerExternals { - compiled_reefs: Vec, -} - -impl CompilerExternals { - pub fn get_compiled_reef(&self, id: ReefId) -> Option<&CompiledReef> { - debug_assert!( - id != LANG_REEF, - "lang reef does not have compilation information" - ); - self.compiled_reefs.get(id.0 - 1) - } - - pub fn set(&mut self, id: ReefId, reef: CompiledReef) { - if self.compiled_reefs.len() < id.0 { - self.compiled_reefs.resize(id.0, CompiledReef::default()) - } - self.compiled_reefs[id.0 - 1] = reef - } -} diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index d8a5c428..0cd97cb0 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -1,22 +1,14 @@ -use std::collections::HashSet; use std::io; use std::io::Write; use ::context::source::ContentId; -use analyzer::engine::Engine; -use analyzer::environment::symbols::SymbolInfo; -use analyzer::reef::{Externals, ReefId}; -use analyzer::relations::{Relations, ResolvedSymbol, SourceId}; -use analyzer::types::engine::{Chunk, ChunkKind, StructureId, TypedEngine}; -use analyzer::types::hir::ExprKind; -use analyzer::types::ty::Type; -use analyzer::types::Typing; +use analyzer::hir::{Chunk, EncodableContent, NamedExports}; +use analyzer::{Database, Reef}; use crate::bytecode::{Bytecode, InstructionPos, Instructions}; use crate::constant_pool::ConstantPool; use crate::context::EmitterContext; use crate::emit::{emit, EmissionState}; -use crate::externals::{CompiledReef, CompilerExternals}; use crate::locals::LocalsLayout; use crate::r#type::{get_type_stack_size, ValueStackSize}; use crate::structure::StructureLayout; @@ -25,13 +17,10 @@ pub mod bytecode; mod constant_pool; mod context; mod emit; -pub mod externals; mod locals; mod structure; mod r#type; -pub(crate) type Captures = Vec>>; - pub trait SourceLineProvider { /// returns the line, starting from one, attributed to the given byte position of given content. fn get_line(&self, content: ContentId, byte_pos: usize) -> Option; @@ -45,159 +34,70 @@ pub struct CompilerOptions<'a> { const MAPPINGS_ATTRIBUTE: u8 = 1; -fn compile_layouts(typed_engine: &TypedEngine) -> Vec { - let mut layouts = Vec::new(); - for structure in typed_engine.iter_structures() { - layouts.push(StructureLayout::from(structure)) - } - - layouts -} - -#[allow(clippy::too_many_arguments)] pub fn compile_reef( - typed_engine: &TypedEngine, - relations: &Relations, - typing: &Typing, - link_engine: &Engine, - externals: &Externals, - compiler_externals: &CompilerExternals, - reef_id: ReefId, - starting_page: SourceId, + database: &Database, + reef: &Reef, writer: &mut impl Write, options: CompilerOptions, -) -> Result { - let layouts = compile_layouts(typed_engine); - let captures = resolve_captures(link_engine, relations, reef_id); - +) -> Result<(), io::Error> { let mut bytecode = Bytecode::default(); let mut cp = ConstantPool::default(); - - let mut it = typed_engine.group_by_content(link_engine, starting_page); - while let Some(content) = it.next() { - // emitting page's main function (usually a script's root code) - let (chunk_id, main_env, main_chunk) = content.main_chunk(&it); + let layouts = Vec::::new(); + + for EncodableContent { + main, + functions, + exports, + } in reef.group_by_content() + { let ctx = EmitterContext { - current_reef: reef_id, - engine: link_engine, - typing, - typed_engine, - externals, - compiler_externals, - environment: main_env, - captures: &captures, - chunk_id, + types: &database.checker.types, + registry: &database.checker.registry, layouts: &layouts, }; - let page_size = - compile_function_chunk(main_chunk, chunk_id, &ctx, &mut bytecode, &mut cp, &options) - .unwrap(); + let mut page_size = 0u32; + for (name, ty) in exports { + let size = ValueStackSize::from(*ty) as u8; + let offset = page_size; + page_size += size as u32; + cp.insert_exported(name, offset, ty.is_obj()); + } + + compile_function_chunk(main, exports, &ctx, &mut bytecode, &mut cp, &options); + write_exported(&mut cp, page_size, &mut bytecode)?; - // compile structures - let structures: Vec<_> = iter_structs(typing).collect(); - - bytecode.emit_u32(structures.len() as u32); - - for (structure_env_id, structure_id) in structures { - let structure_env = link_engine.get_environment(structure_env_id).unwrap(); - let structure = typed_engine.get_structure(structure_id).unwrap(); - bytecode.emit_constant_ref(cp.insert_string(structure_env.fqn.to_string())); - let fields = structure.get_fields(); - - // set structure bytes length and objects indexes - let mut structure_bytes_count = 0; - let mut structure_object_indexes_len = 0; - let structure_bytes_count_ph = bytecode.emit_u32_placeholder(); - let structure_object_indexes_len_ph = bytecode.emit_u32_placeholder(); - for field in fields { - if field.ty.is_obj() { - structure_object_indexes_len += 1; - bytecode.emit_u32(structure_bytes_count); - } - structure_bytes_count += u8::from(ValueStackSize::from(field.ty)) as u32; - } - bytecode.patch_u32_placeholder(structure_bytes_count_ph, structure_bytes_count); - bytecode.patch_u32_placeholder( - structure_object_indexes_len_ph, - structure_object_indexes_len, - ); - } + bytecode.emit_u32(layouts.len() as u32); + bytecode.emit_u32(functions.len() as u32); - // compile functions (filter out unimplemented functions) - let chunk_functions: Vec<_> = content.defined_functions(&it).collect(); - - bytecode.emit_u32(chunk_functions.len() as u32); - - for (chunk_id, env, chunk) in chunk_functions { - let ctx = EmitterContext { - current_reef: reef_id, - engine: link_engine, - typing, - typed_engine, - externals, - compiler_externals, - environment: env, - captures: &captures, - chunk_id, - layouts: &layouts, - }; - - compile_function_chunk( - chunk, - chunk_id, - &ctx, - &mut bytecode, - &mut cp, - &CompilerOptions { - last_page_storage_var: None, - ..options - }, - ); + for function in functions { + compile_function_chunk(function, exports, &ctx, &mut bytecode, &mut cp, &options); } } write(writer, &bytecode, &cp)?; - - Ok(CompiledReef { layouts }) -} - -fn iter_structs(typing: &Typing) -> impl Iterator + '_ { - typing.iter().filter_map(|(_, tpe)| match tpe { - &Type::Structure(Some(env), structure_id) => Some((env, structure_id)), - _ => None, - }) + Ok(()) } fn compile_function_chunk( chunk: &Chunk, - id: SourceId, + exports: &NamedExports, ctx: &EmitterContext, bytecode: &mut Bytecode, cp: &mut ConstantPool, options: &CompilerOptions, -) -> Option { +) { // emit the function's name - let signature_idx = cp.insert_string(ctx.environment.fqn.clone()); + let signature_idx = cp.insert_string(chunk.fqn.display()); bytecode.emit_constant_ref(signature_idx); // emits chunk's code attribute - let (page_size, segments) = compile_code(chunk, id, bytecode, ctx, cp, options); + let segments = compile_code(chunk, exports, bytecode, ctx, cp, options); let line_provider = options.line_provider; let attribute_count = line_provider.map_or(0, |_| 1); bytecode.emit_byte(attribute_count); - - if let Some(line_provider) = line_provider { - let content = ctx.engine.get_original_content(id); - - let Some(content_id) = content else { - return page_size; - }; - compile_line_mapping_attribute(segments, content_id, bytecode, line_provider); - } - page_size } fn compile_line_mapping_attribute( @@ -249,83 +149,6 @@ fn compile_line_mapping_attribute( } } -/// Resolves all captured variables of a given chunk identifier. -/// -/// This function will resolve all direct captures of the chunk and the captures of its inner chunks. -/// All resolved captures are set into the given `captures` vector. -fn resolve_captures(engine: &Engine, relations: &Relations, compiled_reef: ReefId) -> Captures { - let mut externals = HashSet::new(); - let mut captures = vec![None; engine.len()]; - - fn resolve( - chunk_id: SourceId, - compiled_reef: ReefId, - engine: &Engine, - relations: &Relations, - captures: &mut Vec>>, - externals: &mut HashSet, - ) { - let env = engine.get_environment(chunk_id).unwrap(); - - // recursively resolve all inner functions - for func_id in env.iter_direct_inner_environments() { - resolve( - func_id, - compiled_reef, - engine, - relations, - captures, - externals, - ); - // filter out external symbols that refers to the current chunk - externals.retain(|symbol| symbol.source != chunk_id); - } - - // add this function's external referenced variables - externals.extend( - env.symbols - .external_symbols() - .map(|(_, relation)| { - relations[relation] - .state - .expect_resolved("unresolved relation during compilation") - }) - .filter(|symbol| { - symbol.reef == compiled_reef && { - // filter out functions - let env = engine.get_environment(symbol.source).unwrap(); - let var = env.symbols.get(symbol.object_id).unwrap(); - var.ty == SymbolInfo::Variable && !(env.is_script && var.is_exported()) - } - }), - ); - - let mut chunk_captures: Vec = externals.iter().copied().collect(); - - chunk_captures.sort_by(|a, b| { - a.source - .0 - .cmp(&b.source.0) - .then_with(|| a.object_id.0.cmp(&b.object_id.0)) - }); - - captures[chunk_id.0] = Some(chunk_captures) - } - - // Resolve captures of all environments, starting from the roots of each module - for (engine_id, _) in engine.environments().filter(|(_, chunk)| chunk.is_script) { - resolve( - engine_id, - compiled_reef, - engine, - relations, - &mut captures, - &mut externals, - ); - } - captures -} - /// compiles chunk's code attribute /// the code attribute of a chunk is a special attribute that contains the bytecode instructions and /// locals specifications @@ -333,94 +156,45 @@ fn resolve_captures(engine: &Engine, relations: &Relations, compiled_reef: ReefI /// returns the page length (if chunk is a script) and the hir's segments associated with their first instruction. fn compile_code( chunk: &Chunk, - chunk_id: SourceId, + exports: &NamedExports, bytecode: &mut Bytecode, ctx: &EmitterContext, cp: &mut ConstantPool, options: &CompilerOptions, -) -> (Option, Vec) { +) -> Vec { let locals_byte_count = bytecode.emit_u32_placeholder(); - - let chunk_captures = ctx.captures[chunk_id.0] - .as_ref() - .expect("unresolved capture after resolution"); - - let function_id = match ctx.typing.get_type(chunk.function_type).unwrap() { - Type::Function(_, id) => *id, - _ => panic!("attempted to compile a non-function chunk."), + let (parameters, return_bytes_count) = if let Some(function_id) = chunk.function { + let function = &ctx.registry[function_id]; + let return_bytes_count = get_type_stack_size(function.return_type) as u8; + (function.param_types.as_slice(), return_bytes_count) + } else { + ([].as_slice(), 0u8) }; - - let function = ctx.get_function(ctx.current_reef, function_id).unwrap(); - - // compute the chunk's parameters bytes length - let parameters_bytes_count: u32 = { - let explicit_params_count: u32 = function - .parameters - .iter() - .map(|p| Into::::into(get_type_stack_size(p.ty)) as u32) - .sum::(); - let captures_params_count: u32 = - chunk_captures.len() as u32 * u8::from(ValueStackSize::QWord) as u32; - explicit_params_count + captures_params_count - }; - + let parameters_bytes_count = parameters + .iter() + .map(|p| get_type_stack_size(p.ty) as u8 as u32) + .sum(); bytecode.emit_u32(parameters_bytes_count); - // emit the function's return bytes count - let return_bytes_count: u8 = get_type_stack_size(function.return_type).into(); bytecode.emit_byte(return_bytes_count); - let use_value = return_bytes_count != 0 || options.last_page_storage_var.is_some(); - // emit instruction count placeholder let instruction_count = bytecode.emit_u32_placeholder(); - let mut instructions = Instructions::wrap(bytecode); - let var_count = ctx.environment.symbols.all().len() + chunk_captures.len(); - let mut locals = LocalsLayout::new(var_count); - - // set space for explicit parameters - for param in function.parameters.iter() { - locals.set_value_space(param.local_id, param.ty) - } - - // set space for implicit captures - for id in chunk_captures { - locals.init_external_ref_space(*id) - } + let mut locals = LocalsLayout::new(&chunk.locals); + let use_value = return_bytes_count != 0 || options.last_page_storage_var.is_some(); let mut state = EmissionState { use_values: use_value, ..EmissionState::default() }; - let chunk_is_script = ctx.environment.is_script; - - if let ChunkKind::DefinedFunction(code) = &chunk.kind { - let code = code - .as_ref() - .expect("defined function should have its body typed"); - emit(code, &mut instructions, ctx, cp, &mut locals, &mut state); - - if let Some(storage_exported_val) = &options.last_page_storage_var { - assert!( - chunk_is_script, - "only script chunks can store their last expression value in a storage export" - ); - let last_expr = if let ExprKind::Block(b) = &code.kind { - b.last().unwrap_or(code) - } else { - code - }; - - let page_offset = cp.exported.last().map_or(0, |exp| { - exp.page_offset + u8::from(ValueStackSize::QWord) as u32 - }); - cp.insert_exported(storage_exported_val, page_offset, last_expr.ty.is_obj()); - instructions.emit_set_external( - cp.get_external(storage_exported_val).unwrap(), - last_expr.ty.into(), - ) - } - } + emit( + &chunk.expr, + &mut instructions, + ctx, + cp, + &mut locals, + &mut state, + ); // patch instruction count placeholder let instruction_byte_count = instructions.current_ip(); @@ -436,15 +210,7 @@ fn compile_code( bytecode.emit_u32(offset) } - if !chunk_is_script { - return (None, segments); - } - - let mut page_length = locals_length; - if options.last_page_storage_var.is_some() { - page_length += u8::from(ValueStackSize::QWord) as u32 - } - (Some(page_length), segments) + segments } fn write( @@ -490,81 +256,3 @@ fn write_exported( pool.exported.clear(); Ok(()) } - -#[cfg(test)] -mod tests { - use pretty_assertions::assert_eq; - - use analyzer::importer::StaticImporter; - use analyzer::name::Name; - use analyzer::reef::{Externals, ReefId}; - use analyzer::relations::{LocalId, ResolvedSymbol, SourceId}; - use parser::parse_trusted; - - use crate::resolve_captures; - - #[test] - fn test_inner_functions_captures() { - let src = "\ - fun foo() = {\ - var i = 0 - var b = 1 - fun foo1(n: Int) = { - fun foo2() = { - echo $n $i - } - echo $b - } - fun bar() = { - fun bar1() = { - fun bar2() = { - $i - } - } - } - }\ - "; - let externals = Externals::default(); - let reef_id = ReefId(1); - let analyzer = analyzer::analyze( - Name::new("test"), - &mut StaticImporter::new([(Name::new("test"), src)], parse_trusted), - &externals, - ); - let captures = resolve_captures( - &analyzer.resolution.engine, - &analyzer.resolution.relations, - reef_id, - ); - - assert_eq!( - captures, - vec![ - Some(vec![]), //root - Some(vec![]), //foo - Some(vec![ - //foo1 - ResolvedSymbol::new(reef_id, SourceId(1), LocalId(0)), - ResolvedSymbol::new(reef_id, SourceId(1), LocalId(1)), - ]), - Some(vec![ - //foo2 - ResolvedSymbol::new(reef_id, SourceId(1), LocalId(0)), - ResolvedSymbol::new(reef_id, SourceId(2), LocalId(0)), - ]), - Some(vec![ - //bar - ResolvedSymbol::new(reef_id, SourceId(1), LocalId(0)), - ]), - Some(vec![ - //bar1 - ResolvedSymbol::new(reef_id, SourceId(1), LocalId(0)), - ]), - Some(vec![ - //bar2 - ResolvedSymbol::new(reef_id, SourceId(1), LocalId(0)), - ]), - ] - ) - } -} diff --git a/compiler/src/locals.rs b/compiler/src/locals.rs index b2bf505f..893d9fd9 100644 --- a/compiler/src/locals.rs +++ b/compiler/src/locals.rs @@ -1,95 +1,59 @@ -use std::collections::hash_map::Entry; -use std::collections::HashMap; - -use analyzer::relations::{LocalId, ResolvedSymbol}; -use analyzer::types::hir::Var; -use analyzer::types::ty::TypeRef; +use analyzer::typing::user::TypeId; +use analyzer::typing::variable::{LocalEnvironment, LocalId}; use crate::r#type::ValueStackSize; /// contains the different index per local value allocated in the locals area pub struct LocalsLayout { /// the start indexes of bound Locals - values_indexes: Vec>, - /// the start indexes of bound external values - external_refs_indexes: HashMap, + values_indexes: Vec<(u32, bool)>, + /// the length in bytes len: u32, } impl LocalsLayout { - pub fn new(var_count: usize) -> Self { - let var_indexes = vec![None; var_count]; - let external_ref_indexes = HashMap::default(); + pub fn new(locals: &LocalEnvironment) -> Self { + let mut next_offset = 0u32; Self { - values_indexes: var_indexes, - external_refs_indexes: external_ref_indexes, - len: 0, + values_indexes: locals + .locals + .iter() + .map(|local| { + let size = ValueStackSize::from(local.ty) as u8; + let index = (next_offset, local.ty.is_obj()); + next_offset += size as u32; + index + }) + .collect::>(), + len: next_offset, } } - /// Reserves the space in the locals depending on the stack size needed by the given type. - /// - /// Different initialization orders will result in different indexes. - /// - /// # Panics - /// Panics if the local id is out of bounds. - pub fn set_value_space(&mut self, id: LocalId, tpe: TypeRef) { - let size: u8 = ValueStackSize::from(tpe).into(); - self.values_indexes[id.0] = Some((self.len, tpe.is_obj())); - self.len += size as u32; - } - /// Creates a new local and reserves the space for it. - pub fn push_value_space(&mut self, tpe: TypeRef) -> LocalId { + pub fn push_value_space(&mut self, tpe: TypeId) -> LocalId { let id = LocalId(self.values_indexes.len()); - let size: u8 = ValueStackSize::from(tpe).into(); - self.values_indexes.push(Some((self.len, tpe.is_obj()))); + let size = ValueStackSize::from(tpe) as u8; + self.values_indexes.push((self.len, tpe.is_obj())); self.len += size as u32; id } - /// Reserves the space in local's of the external reference, if not already set. - /// - /// Different initialization orders will result in different indexes. - pub fn init_external_ref_space(&mut self, symbol: ResolvedSymbol) { - match self.external_refs_indexes.entry(symbol) { - Entry::Occupied(_) => {} - Entry::Vacant(v) => { - v.insert(self.len); - self.len += u8::from(ValueStackSize::QWord) as u32; - } - } - } - /// Get the starting byte index allocated for the given local. /// and a flag specifying if the local refers to an object reference. /// - /// Returns [`None`] if the local is size has not yet been initialized. - /// /// # Panics /// Panics if the local id is out of bounds. - pub fn get_index(&self, id: LocalId) -> Option { - self.values_indexes[id.0].map(|(pos, _)| pos) - } - - pub fn get_var_index(&self, var: Var) -> Option { - match var { - Var::Local(LocalId(id)) => self.values_indexes[id].map(|(pos, _)| pos), - Var::External(symbol) => self.external_refs_indexes.get(&symbol).copied(), - } + pub fn get_index(&self, id: LocalId) -> u32 { + self.values_indexes[id.0].0 } pub fn refs_offset(self) -> Vec { - self.values_indexes - .into_iter() - .filter_map(|val| val.and_then(|(pos, is_obj)| is_obj.then_some(pos))) - .chain(self.external_refs_indexes.into_values()) - .collect() + Vec::new() } - pub fn get_capture_index(&self, var: ResolvedSymbol) -> Option { - self.external_refs_indexes.get(&var).copied() + pub fn get_capture_index(&self, var: LocalId) -> Option { + None } pub fn byte_count(&self) -> u32 { diff --git a/compiler/src/structure.rs b/compiler/src/structure.rs index 240c9a41..b1a04c44 100644 --- a/compiler/src/structure.rs +++ b/compiler/src/structure.rs @@ -1,7 +1,6 @@ -use analyzer::relations::LocalId; -use analyzer::types::ty::StructureDesc; - use crate::r#type::ValueStackSize; +use analyzer::typing::schema::Schema; +use analyzer::typing::variable::LocalId; #[derive(Debug, Clone, PartialEq)] pub struct StructureLayout { @@ -10,19 +9,19 @@ pub struct StructureLayout { indexes: Vec<(u32, ValueStackSize)>, } -impl From<&StructureDesc> for StructureLayout { - fn from(structure: &StructureDesc) -> Self { +impl From<&Schema> for StructureLayout { + fn from(structure: &Schema) -> Self { let mut indexes = Vec::new(); let mut idx = 0; - for field in structure.get_fields() { + for field in structure.fields.values() { let field_size = ValueStackSize::from(field.ty); indexes.push((idx, field_size)); idx += u8::from(field_size) as u32; } Self { - field_offset: structure.type_parameters.len(), + field_offset: structure.generic_variables.len(), total_size: idx, indexes, } diff --git a/compiler/src/type.rs b/compiler/src/type.rs index 9105e1a1..e8fedf4a 100644 --- a/compiler/src/type.rs +++ b/compiler/src/type.rs @@ -1,37 +1,33 @@ -use analyzer::types::ty::TypeRef; -use analyzer::types::{BOOL, ERROR, EXITCODE, FLOAT, INT, NOTHING, UNIT}; +use analyzer::typing::user::{self, TypeId}; /// returns the size of a given type identifier -pub fn get_type_stack_size(tpe: TypeRef) -> ValueStackSize { +pub fn get_type_stack_size(tpe: TypeId) -> ValueStackSize { match tpe { - NOTHING | UNIT => ValueStackSize::Zero, - BOOL | EXITCODE => ValueStackSize::Byte, - INT | FLOAT => ValueStackSize::QWord, - ERROR => panic!("Received 'ERROR' type in compilation phase."), - _ => ValueStackSize::QWord, //other types are object types which are references (q-words) + user::NOTHING_TYPE | user::UNIT_TYPE => ValueStackSize::Zero, + user::BOOL_TYPE | user::EXITCODE_TYPE => ValueStackSize::Byte, + user::UNKNOWN_TYPE | user::ERROR_TYPE => { + panic!("Received '`{tpe:?}`' type in compilation phase.") + } + _ => ValueStackSize::QWord, // other types are object types which are references (q-words) } } /// Different sizes a value can have on the stack. #[derive(Copy, Clone, Eq, Debug, PartialEq)] pub enum ValueStackSize { - Zero, - Byte, - QWord, + Zero = 0, + Byte = 1, + QWord = 8, } impl From for u8 { fn from(val: ValueStackSize) -> Self { - match val { - ValueStackSize::Zero => 0, - ValueStackSize::Byte => 1, - ValueStackSize::QWord => 8, - } + val as u8 } } -impl From for ValueStackSize { - fn from(value: TypeRef) -> Self { +impl From for ValueStackSize { + fn from(value: TypeId) -> Self { get_type_stack_size(value) } } diff --git a/context/src/source.rs b/context/src/source.rs index 70f46efd..02fffda9 100644 --- a/context/src/source.rs +++ b/context/src/source.rs @@ -5,6 +5,7 @@ use miette::{MietteError, MietteSpanContents, SourceCode, SourceSpan, SpanConten /// A range of bytes in an unbound string source. pub type SourceSegment = std::ops::Range; +pub type Span = std::ops::Range; /// An identifier to a source code. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 730b7942..6c0ba5ac 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -4,8 +4,9 @@ use ast::group::Block; use ast::Expr; use context::source::SourceSegmentHolder; +use std::str::FromStr; -use crate::err::ParseReport; +use crate::err::{ParseError, ParseReport}; use crate::parser::Parser; mod aspects; @@ -15,6 +16,11 @@ mod moves; mod parser; pub mod source; +#[derive(Debug, Clone, PartialEq)] +pub struct Root { + pub expressions: Vec, +} + pub fn parse(src: &str) -> ParseReport { Parser::new(src).parse() } @@ -27,3 +33,18 @@ pub fn parse_trusted(src: &str) -> Expr { segment: src.segment(), }) } + +impl FromStr for Root { + type Err = Vec; + + fn from_str(s: &str) -> Result { + let report = parse(s); + if report.is_ok() { + Ok(Root { + expressions: report.expr, + }) + } else { + Err(report.errors) + } + } +} From f2cc6a8deea30babb1bf8a89b3a9f189dd725b22 Mon Sep 17 00:00:00 2001 From: syldium Date: Sun, 25 Aug 2024 19:00:40 +0200 Subject: [PATCH 02/11] [ci skip] Setup the REPL and the test runner --- Cargo.lock | 1 - analyzer/src/hir.rs | 6 +- analyzer/src/hoist.rs | 145 +++++++++++++++---- analyzer/src/lib.rs | 121 ++++++++++++++-- analyzer/src/module.rs | 72 +++++++--- analyzer/src/typing.rs | 179 ++++++++++++++++++------ analyzer/src/typing/assign.rs | 134 +++++++++++++++++- analyzer/src/typing/flow.rs | 64 +++++++++ analyzer/src/typing/function.rs | 27 +++- analyzer/src/typing/lower.rs | 53 ++++++- analyzer/src/typing/operator.rs | 104 ++++++++++++++ analyzer/src/typing/registry.rs | 17 ++- analyzer/src/typing/schema.rs | 8 +- analyzer/src/typing/shell.rs | 47 ++++++- analyzer/src/typing/user.rs | 44 +++++- cli/src/cli.rs | 17 ++- cli/src/lib.rs | 2 - cli/src/library.rs | 54 +++++++ cli/src/main.rs | 45 ++++-- cli/src/pipeline.rs | 46 ++++-- cli/src/repl.rs | 34 ++--- cli/src/report.rs | 13 +- compiler/src/emit.rs | 6 +- compiler/src/emit/invoke.rs | 10 +- compiler/src/emit/native.rs | 240 +++++++++++++++++++++++++++++++- compiler/src/lib.rs | 45 +++++- lib/std.msh | 57 ++++++++ lib/std/assert.msh | 2 +- vm/Cargo.toml | 1 - vm/src/definitions/pager.cpp | 16 +-- vm/src/definitions/pager.h | 8 +- vm/src/vm.cpp | 5 +- vm/tests/integration/runner.rs | 235 ++++++++++++++----------------- 33 files changed, 1520 insertions(+), 338 deletions(-) create mode 100644 analyzer/src/typing/flow.rs create mode 100644 analyzer/src/typing/operator.rs create mode 100644 cli/src/library.rs diff --git a/Cargo.lock b/Cargo.lock index 08029f20..fa4adfc9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1367,7 +1367,6 @@ version = "0.1.0" dependencies = [ "analyzer", "ast", - "cli", "cmake", "compiler", "context", diff --git a/analyzer/src/hir.rs b/analyzer/src/hir.rs index 7b590cfd..f102353c 100644 --- a/analyzer/src/hir.rs +++ b/analyzer/src/hir.rs @@ -5,7 +5,6 @@ use crate::Reef; use ast::call::{RedirFd, RedirOp}; use ast::value::LiteralValue; use context::source::Span; -use std::collections::hash_map::Values; use std::collections::HashMap; use std::path::PathBuf; use std::rc::Rc; @@ -175,6 +174,7 @@ pub enum ExprKind { Capture(Vec), Substitute(Substitute), Subprocess(Subprocess), + Cast(Box), Continue, Break, @@ -246,7 +246,7 @@ impl Module { impl Reef { pub fn group_by_content(&self) -> ContentIterator { ContentIterator { - inner: self.hir.values(), + inner: self.hir.as_slice().iter(), } } } @@ -267,7 +267,7 @@ pub struct EncodableContent<'a> { } pub struct ContentIterator<'a> { - inner: Values<'a, PathBuf, Module>, + inner: std::slice::Iter<'a, Module>, } impl<'a> Iterator for ContentIterator<'a> { diff --git a/analyzer/src/hoist.rs b/analyzer/src/hoist.rs index 9487fd17..1450a12a 100644 --- a/analyzer/src/hoist.rs +++ b/analyzer/src/hoist.rs @@ -6,17 +6,16 @@ use crate::typing::user::{ lookup_builtin_type, TypeId, UserType, ERROR_TYPE, STRING_TYPE, UNIT_TYPE, }; use crate::typing::{Parameter, TypeChecker, TypeErrorKind}; -use crate::{Reef, SourceLocation, TypeError}; +use crate::{Reef, SourceLocation, TypeError, UnitKey}; use ast::function::{FunctionDeclaration, FunctionParameter}; use ast::r#struct::{StructDeclaration, StructImpl}; use ast::r#type::Type; -use ast::r#use::{Import, ImportedSymbol, InclusionPathItem}; +use ast::r#use::{Import, ImportList, ImportedSymbol, InclusionPathItem}; use ast::variable::TypedVariable; use ast::Expr; use context::source::{SourceSegmentHolder, Span}; -use parser::Root; -use std::collections::HashMap; -use std::ffi::OsString; +use std::collections::{HashMap, HashSet}; +use std::ffi::{OsStr, OsString}; use std::path::{Path, PathBuf}; /// Places functions and types at the top level of the symbol table. @@ -34,15 +33,26 @@ use std::path::{Path, PathBuf}; /// This step performs multiple passes where types are added to the symbol table, and then their /// fields are added to the type. A third pass is done to hoist functions, now that all parameters /// and return types may be known. +/// +/// # Exploration order +/// +/// The hoisting phase creates an identity for each module and outlines the dependencies between +/// them. It does then compute a satisfying order to analyse each module, one at a time, that should +/// not have cyclic dependencies for variables. pub(super) fn hoist_files( foreign: &HashMap, reef: &mut Reef, checker: &mut TypeChecker, + mut keys: Vec, ) -> HoistingResult { let mut errors = Vec::::new(); let mut graph = HashMap::new(); - for (path, root) in &reef.files { - let mut table = SymbolTable::new(path.clone()); + for key @ UnitKey { path, .. } in &keys { + let root = reef.files.get(key).expect("unit should exist"); + let mut table = reef + .symbols + .remove(path) + .unwrap_or_else(|| SymbolTable::new(path.clone())); let mut exports = reef.exports.take_exports(path); // This is not problematic if the export is not found, but it shouldn't happen let modules = ModuleView::new(&reef.exports, foreign); let mut deps = Dependencies { @@ -63,17 +73,24 @@ pub(super) fn hoist_files( reef.symbols.insert(path.clone(), table); } - let mut sorted: Vec = Vec::new(); + let mut sorted: Vec = Vec::new(); let mut frontier: Vec = graph .keys() .filter(|module| graph.values().all(|deps| !deps.contains(module))) .cloned() .collect(); + let mut seen: HashSet = frontier.iter().cloned().collect(); // Deduplicate when the same file is imported multiple times while let Some(module) = frontier.pop() { - sorted.push(module.clone()); + let key = keys.swap_remove( + keys.iter() + .position(|key| key.path == module) + .expect("module should exist"), + ); + sorted.push(key); if let Some(requires) = graph.remove(&module) { for require in requires { - if !graph.values().any(|deps| deps.contains(&require)) { + if !seen.contains(&require) && !graph.values().any(|deps| deps.contains(&require)) { + seen.insert(require.clone()); frontier.push(require); } } @@ -99,7 +116,7 @@ pub(super) fn hoist_files( pub(crate) struct HoistingResult { pub(crate) errors: Vec, - pub(crate) sorted: Vec, + pub(crate) sorted: Vec, } struct Dependencies<'a> { @@ -108,12 +125,12 @@ struct Dependencies<'a> { } fn hoist_type_names( - root: &Root, + root: &[Expr], checker: &mut TypeChecker, table: &mut SymbolTable, exports: &mut [Export], ) { - for expr in &root.expressions { + for expr in root.iter() { if let Expr::StructDeclaration(StructDeclaration { name, segment: span, @@ -139,14 +156,14 @@ fn hoist_type_names( } fn hoist_functions( - root: &Root, + root: &[Expr], checker: &mut TypeChecker, table: &mut SymbolTable, exports: &mut [Export], deps: &mut Dependencies, errors: &mut Vec, ) { - for expr in &root.expressions { + for expr in root.iter() { match expr { Expr::FunctionDeclaration(fn_decl) => { hoist_fn_decl(fn_decl, None, checker, table, exports, errors); @@ -164,6 +181,9 @@ fn hoist_functions( alias, segment: span, }) => { + if matches!(path.first(), Some(InclusionPathItem::Symbol(_))) { + return; // Exclude inter-reefs dependencies + } let (last, rest) = path.split_last().expect("at least one item"); if let Some(module) = deps.modules.get_direct(rest) { for export in &module.exports { @@ -192,10 +212,55 @@ fn hoist_functions( } } } - Import::AllIn(path, _) => todo!(), + Import::AllIn(path, _) => { + if matches!(path.first(), Some(InclusionPathItem::Symbol(_))) { + return; // Exclude inter-reefs dependencies + } + if let Some(module) = deps.modules.get_direct(path) { + for export in &module.exports { + table.insert_remote(export.name.clone(), import.segment(), export); + } + } + } Import::Environment(_) => {} - Import::List(list) => { - todo!() + Import::List(ImportList { + root, + imports, + segment: span, + }) => { + if matches!(root.first(), Some(InclusionPathItem::Symbol(_))) { + return; // Exclude inter-reefs dependencies + } + let base = root + .iter() + .skip_while(|item| matches!(item, InclusionPathItem::Reef(_))) + .map(|item| item.name()) + .collect::(); + if let Some(mut module) = deps.modules.get_direct(root) { + for import in imports { + match import { + Import::Symbol(symbol) => { + let mut path = base.clone(); + for part in symbol.path.iter() { + if let InclusionPathItem::Symbol(ident) = part { + if let Some(tree) = + module.get(OsStr::new(ident.value.as_str())) + { + path.push(ident.value.as_str()); + module = tree; + } else { + deps.requires.push(path); + break; + } + } + } + } + Import::AllIn(_, _) => {} + Import::Environment(_) => {} + Import::List(_) => {} + } + } + } } } } @@ -214,6 +279,7 @@ fn hoist_fn_decl( name, type_parameters, parameters, + body, return_type, .. }: &FunctionDeclaration, @@ -271,15 +337,37 @@ fn hoist_fn_decl( } None => UNIT_TYPE, }; - let mut fqn = table.path.clone(); - fqn.push(name.value.to_string()); + table.exit_scope(); + let fqn = if let Some(current_ty) = current_ty { + let UserType::Parametrized { schema, .. } = checker.types[current_ty] else { + panic!( + "the current type should be a struct, got {:?}", + checker.types[current_ty] + ); + }; + let Schema { + name: ref type_name, + .. + } = checker.registry[schema]; + let mut fqn = PathBuf::from(type_name); + fqn.push(name.value.to_string()); + fqn + } else { + let mut fqn = table.path.clone(); + fqn.push(name.value.to_string()); + fqn + }; let function = checker.registry.define_function(Function { declared_at: table.path.clone(), fqn, generic_variables, param_types, return_type, - kind: FunctionKind::Function, + kind: if body.is_some() { + FunctionKind::Function + } else { + FunctionKind::Intrinsic + }, }); let function_type = checker.types.alloc(UserType::Function(function)); match current_ty { @@ -548,12 +636,15 @@ mod tests { fn hoist_files(fs: MemoryFilesystem, entrypoint: &str) -> Vec { let mut reef = Reef::new(OsString::from("test")); - assert_eq!( - import_multi(&mut reef, &fs, entrypoint), - [], - "no import errors should be found" - ); - super::hoist_files(&mut HashMap::new(), &mut reef, &mut TypeChecker::default()).errors + let import_result = import_multi(&mut reef, &fs, entrypoint); + assert_eq!(import_result.errors, [], "no import errors should be found"); + super::hoist_files( + &mut HashMap::new(), + &mut reef, + &mut TypeChecker::default(), + import_result.keys, + ) + .errors } fn hoist(source: &str) -> Vec { diff --git a/analyzer/src/lib.rs b/analyzer/src/lib.rs index 7731bed2..5ce0ce5d 100644 --- a/analyzer/src/lib.rs +++ b/analyzer/src/lib.rs @@ -8,7 +8,7 @@ //! The analysis is done in a pipeline: //! 1. *Importing*: the whole project is parsed and indexed in a list of exports and imports. //! 2. *Hoisting*: the types and symbols are discovered and placed in the global scope of each -//! module. +//! module. //! 3. *Type checking*: the types are checked for consistency and errors are reported. //! //! Each phase uses the results of the previous ones, but each phase take can work with partial @@ -22,15 +22,15 @@ pub mod symbol; pub mod typing; use crate::hoist::hoist_files; -use crate::module::{import_multi, ModuleTree}; +use crate::module::{append, import_multi, ModuleTree}; use crate::symbol::SymbolTable; use crate::typing::{type_check, TypeChecker, TypeError}; use context::source::Span; use parser::err::ParseError; use std::collections::HashMap; use std::ffi::OsString; -use std::io; use std::path::{Path, PathBuf}; +use std::{fmt, io}; /// A byte range in a file. #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -109,7 +109,7 @@ pub struct Database { /// A yet-to-be-analyzed set of files. pub struct Reef { /// The parsed abstract syntax trees of the files. - files: HashMap, + files: FileMemory, /// The export tree representing each module. exports: ModuleTree, @@ -118,19 +118,67 @@ pub struct Reef { symbols: HashMap, /// The high-level typed intermediate representation of the code. - hir: HashMap, + hir: Vec, +} + +struct FileMemory { + files: HashMap, +} + +impl FileMemory { + fn get(&self, key: &UnitKey) -> Option<&[ast::Expr]> { + self.files + .get(&key.path) + .map(|root| &root.expressions[key.offset..]) + } +} + +pub struct FileImporter { + root: PathBuf, +} + +impl FileImporter { + pub fn new(root: PathBuf) -> Self { + Self { root } + } +} + +impl Filesystem for FileImporter { + fn read(&self, path: &Path) -> io::Result { + let mut path = self.root.join(path); + path.set_extension("msh"); + std::fs::read_to_string(path) + } +} + +pub(crate) struct UnitKey { + pub(crate) path: PathBuf, + offset: usize, +} + +impl fmt::Debug for UnitKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}+{}", self.path, self.offset) + } } impl Reef { /// Creates a new empty library with a given name. pub fn new(name: OsString) -> Self { Self { - files: HashMap::new(), + files: FileMemory { + files: HashMap::new(), + }, exports: ModuleTree::new(name), symbols: HashMap::new(), - hir: HashMap::new(), + hir: Vec::new(), } } + + pub fn clear_cache(&mut self) { + self.files.files.clear(); + self.hir.clear(); + } } impl Database { @@ -140,6 +188,10 @@ impl Database { } /// Populates the database with a fail-fast strategy. +/// +/// An exploration of the source tree will be started from the entrypoint, and the result will be +/// passed to subsequent phases. If an error is encountered during any phase, this function will +/// return early and will not continue to the next phase. pub fn analyze_multi( database: &mut Database, reef: &mut Reef, @@ -147,15 +199,52 @@ pub fn analyze_multi( entrypoint: &str, ) -> Vec { let mut errors = Vec::::new(); + let import_result = import_multi(reef, fs, entrypoint); + errors.extend(import_result.errors.into_iter().map(PipelineError::from)); + if !errors.is_empty() { + return errors; + } + let hoist_result = hoist_files( + &database.exports, + reef, + &mut database.checker, + import_result.keys, + ); + errors.extend(hoist_result.errors.into_iter().map(PipelineError::from)); + if !errors.is_empty() { + return errors; + } errors.extend( - import_multi(reef, fs, entrypoint) + type_check(reef, database, hoist_result.sorted) .into_iter() .map(PipelineError::from), ); + errors +} + +/// Adds or extends a source file to the reef. +/// +/// The analysis will be run only on the added content, while reusing the previous symbols and types +/// that may were already defined. +pub fn append_source( + database: &mut Database, + reef: &mut Reef, + fs: &dyn Filesystem, + path: PathBuf, + source: &str, +) -> Vec { + let mut errors = Vec::::new(); + let import_result = append(reef, fs, path, source); + errors.extend(import_result.errors.into_iter().map(PipelineError::from)); if !errors.is_empty() { return errors; } - let hoist_result = hoist_files(&database.exports, reef, &mut database.checker); + let hoist_result = hoist_files( + &database.exports, + reef, + &mut database.checker, + import_result.keys, + ); errors.extend(hoist_result.errors.into_iter().map(PipelineError::from)); if !errors.is_empty() { return errors; @@ -167,3 +256,17 @@ pub fn analyze_multi( ); errors } + +pub fn freeze_exports(database: &mut Database, mut reef: Reef) { + let name = reef.exports.name.clone(); + let tree = reef.exports.children.pop().expect("no root module"); + assert!( + reef.exports.children.is_empty(), + "root module shouldn't have multiple children" + ); + assert_eq!( + tree.name, name, + "root module name should match the reef name" + ); + database.exports.insert(name, tree); +} diff --git a/analyzer/src/module.rs b/analyzer/src/module.rs index b85bb0bd..61e3d08b 100644 --- a/analyzer/src/module.rs +++ b/analyzer/src/module.rs @@ -17,15 +17,16 @@ use crate::symbol::SymbolRegistry; use crate::typing::user::{TypeId, UNKNOWN_TYPE}; use crate::typing::{TypeError, TypeErrorKind}; -use crate::{Filesystem, PipelineError, Reef, SourceLocation}; +use crate::{Filesystem, PipelineError, Reef, SourceLocation, UnitKey}; use ast::call::ProgrammaticCall; use ast::function::FunctionDeclaration; -use ast::r#use::{Import as ImportExpr, ImportedSymbol, InclusionPathItem, Use}; +use ast::r#use::{Import as ImportExpr, ImportList, ImportedSymbol, InclusionPathItem, Use}; use ast::variable::VarDeclaration; use ast::Expr; use context::source::{SourceSegment, SourceSegmentHolder, Span}; use parser::err::ParseError; use parser::Root; +use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; use std::ffi::{OsStr, OsString}; use std::io; @@ -243,6 +244,11 @@ impl ModuleTree { } } +pub(crate) struct ImportResult { + pub(crate) errors: Vec, + pub(crate) keys: Vec, +} + #[derive(Clone, Copy)] pub(crate) struct ModuleView<'a> { pub(crate) current: &'a ModuleTree, @@ -275,15 +281,45 @@ impl<'a> ModuleView<'a> { } /// Access all related files starting from the entrypoint. -pub(super) fn import_multi( - reef: &mut Reef, - fs: &dyn Filesystem, - entrypoint: &str, -) -> Vec { - let mut imports = vec![Import { +pub(super) fn import_multi(reef: &mut Reef, fs: &dyn Filesystem, entrypoint: &str) -> ImportResult { + let imports = vec![Import { path: PathBuf::from(entrypoint), origin: None, }]; + explore(reef, fs, imports, Vec::new()) +} + +pub(crate) fn append( + reef: &mut Reef, + fs: &dyn Filesystem, + path: PathBuf, + source: &str, +) -> ImportResult { + let report = parser::parse(source); + let mut unit = UnitKey { + path: path.clone(), + offset: 0, + }; + match reef.files.files.entry(path) { + Entry::Occupied(mut existing) => { + unit.offset = existing.get().expressions.len(); + existing.get_mut().expressions.extend(report.expr); + } + Entry::Vacant(vacant) => { + vacant.insert(Root { + expressions: report.expr, + }); + } + } + explore(reef, fs, Vec::::new(), vec![unit]) +} + +fn explore( + reef: &mut Reef, + fs: &dyn Filesystem, + mut imports: Vec, + mut keys: Vec, +) -> ImportResult { let mut errors = Vec::::new(); let mut visited = HashSet::::new(); while let Some(Import { mut path, origin }) = imports.pop() { @@ -323,9 +359,13 @@ pub(super) fn import_multi( } list_imports(&root, &path, &mut imports); reef.exports.insert(&path, exports); - reef.files.insert(path, root); + keys.push(UnitKey { + path: path.clone(), + offset: 0, + }); + reef.files.files.insert(path, root); } - errors + ImportResult { errors, keys } } #[derive(Debug, PartialEq, Eq)] @@ -444,18 +484,16 @@ fn list_imports_expr(expr: &Expr, path: &Path, imports: &mut Vec) { fn add_import(import: &ImportExpr, origin: &Path, span: SourceSegment, imports: &mut Vec) { match import { - ImportExpr::Symbol(ImportedSymbol { path, .. }) | ImportExpr::AllIn(path, _) => { + ImportExpr::Symbol(ImportedSymbol { path, .. }) + | ImportExpr::AllIn(path, _) + | ImportExpr::List(ImportList { root: path, .. }) => { let [InclusionPathItem::Reef(_), rest @ ..] = path.as_slice() else { return; }; add_import_tree(rest, origin, span, imports); + // TODO: List items } ImportExpr::Environment(_) => {} - ImportExpr::List(items) => { - for item in &items.imports { - add_import(item, origin, span.clone(), imports); - } - } } } @@ -500,7 +538,7 @@ mod tests { .to_string(); let mut reef = Reef::new(OsString::from("test")); let fs = MemoryFilesystem::from_iter(sources); - super::import_multi(&mut reef, &fs, &entrypoint) + super::import_multi(&mut reef, &fs, &entrypoint).errors } #[test] diff --git a/analyzer/src/typing.rs b/analyzer/src/typing.rs index 5f55da29..6fdb1b51 100644 --- a/analyzer/src/typing.rs +++ b/analyzer/src/typing.rs @@ -1,6 +1,8 @@ mod assign; +mod flow; pub mod function; mod lower; +mod operator; pub mod registry; pub mod schema; mod shell; @@ -10,32 +12,37 @@ pub mod variable; use crate::hir::{Conditional, Declaration, ExprKind, FunctionCall, Module, TypedExpr}; use crate::module::ModuleView; use crate::symbol::{Symbol, SymbolDesc, SymbolRegistry, UndefinedSymbol}; -use crate::typing::assign::ascribe_assign; +use crate::typing::assign::{ + ascribe_assign, ascribe_identifier, ascribe_subscript, ascribe_var_reference, +}; +use crate::typing::flow::{ascribe_control, ascribe_while}; use crate::typing::function::Function; -use crate::typing::lower::ascribe_template_string; +use crate::typing::lower::{ascribe_template_string, coerce_condition}; +use crate::typing::operator::ascribe_binary; use crate::typing::registry::{FunctionId, Registry, SchemaId}; use crate::typing::schema::Schema; use crate::typing::shell::{ - ascribe_call, ascribe_detached, ascribe_pipeline, ascribe_redirected, ascribe_substitution, + ascribe_call, ascribe_detached, ascribe_file_pattern, ascribe_pipeline, ascribe_redirected, + ascribe_substitution, }; use crate::typing::user::{ lookup_builtin_type, TypeArena, TypeId, UserType, BOOL_TYPE, ERROR_TYPE, FLOAT_TYPE, INT_TYPE, NOTHING_TYPE, STRING_TYPE, UNIT_TYPE, UNKNOWN_TYPE, }; use crate::typing::variable::{SymbolEntry, VariableTable}; -use crate::{Database, PipelineError, Reef, SourceLocation}; +use crate::{Database, PipelineError, Reef, SourceLocation, UnitKey}; use ast::call::{MethodCall, ProgrammaticCall}; use ast::control_flow::If; use ast::function::FunctionDeclaration; use ast::group::Block; use ast::r#struct::{FieldAccess, StructImpl}; use ast::r#type::{ByName, ParametrizedType, Type}; -use ast::r#use::{Import, InclusionPathItem, Use}; +use ast::r#use::{Import, ImportList, InclusionPathItem, Use}; +use ast::range::Iterable; use ast::value::{Literal, LiteralValue}; -use ast::variable::{VarDeclaration, VarKind, VarReference}; +use ast::variable::{VarDeclaration, VarKind}; use ast::Expr; use context::source::{SourceSegmentHolder, Span}; -use parser::Root; use std::ffi::OsStr; use std::path::PathBuf; use thiserror::Error; @@ -46,17 +53,17 @@ pub(super) fn type_check( exports, ref mut checker, }: &mut Database, - sorted: Vec, + sorted: Vec, ) -> Vec { let mut errors = Vec::::new(); - for path in sorted { - let root = reef.files.get(&path).expect("file should be present"); + for key in sorted { + let root = reef.files.get(&key).expect("file should be present"); let mut table = VariableTable::new( reef.symbols - .get_mut(&path) + .get_mut(key.path.as_path()) .expect("table should be present"), ); - let mut current_module = Module::new(path.clone()); + let mut current_module = Module::new(key.path.clone()); ascribe_types( root, &mut table, @@ -68,7 +75,7 @@ pub(super) fn type_check( current_module.exports = table.take_exports(); let all_module_exports = reef .exports - .get_full_mut(&path) + .get_full_mut(&key.path) .expect("module should exist"); for (variable, ty) in ¤t_module.exports { if let Some(hoisted_export) = all_module_exports.exports.iter_mut().find(|export| { @@ -77,7 +84,7 @@ pub(super) fn type_check( hoisted_export.ty = *ty; } } - reef.hir.insert(path, current_module); + reef.hir.push(current_module); } errors } @@ -267,6 +274,9 @@ pub enum TypeErrorKind { #[error("return statement outside of function body")] ReturnOutsideFunction, + #[error("loop control statement outside of a loop")] + ControlOutsideLoop, + #[error("repeated parameter name `{name}`")] RepeatedParameterName { name: String, previous: Span }, @@ -323,6 +333,7 @@ struct Context<'a> { modules: ModuleView<'a>, hint: TypeHint, return_ty: Option<&'a Return>, + in_loop: bool, } #[derive(Clone)] @@ -342,10 +353,17 @@ impl<'a> Context<'a> { ..self } } + + fn in_loop(self) -> Self { + Self { + in_loop: true, + ..self + } + } } pub(super) fn ascribe_types( - root: &Root, + root: &[Expr], table: &mut VariableTable, checker: &mut TypeChecker, storage: &mut Module, @@ -354,11 +372,12 @@ pub(super) fn ascribe_types( ) { let mut expressions = Vec::new(); table.push_environment(); - for expr in &root.expressions { + for expr in root.iter() { let ctx = Context { modules, hint: TypeHint::Unused, return_ty: None, + in_loop: false, }; expressions.push(ascribe_type(expr, table, checker, storage, ctx, errors)); } @@ -399,6 +418,11 @@ fn ascribe_type( LiteralValue::Bool(_) => BOOL_TYPE, }, }, + Expr::Unary(unary) => { + let typed_expr = ascribe_type(&unary.expr, table, checker, storage, ctx, errors); + typed_expr // TODO + } + Expr::Binary(binary) => ascribe_binary(binary, table, checker, storage, ctx, errors), Expr::TemplateString(tpl) => { ascribe_template_string(tpl, table, checker, storage, ctx, errors) } @@ -449,12 +473,23 @@ fn ascribe_type( } } Expr::Assign(assign) => ascribe_assign(assign, table, checker, storage, ctx, errors), + Expr::Subscript(subscript) => { + ascribe_subscript(subscript, table, checker, storage, ctx, errors) + } + Expr::While(stmt) => ascribe_while(stmt, table, checker, storage, ctx, errors), + Expr::Break(span) => ascribe_control(ExprKind::Break, span.clone(), table, ctx, errors), + Expr::Continue(span) => { + ascribe_control(ExprKind::Continue, span.clone(), table, ctx, errors) + } Expr::FunctionDeclaration(fn_decl) => { ascribe_fn_decl(fn_decl, None, table, checker, storage, ctx, errors); TypedExpr::noop(fn_decl.segment()) } Expr::Call(call) => ascribe_call(call, table, checker, storage, ctx, errors), Expr::Substitution(sub) => ascribe_substitution(sub, table, checker, storage, ctx, errors), + Expr::Parenthesis(paren) => { + ascribe_type(&paren.expression, table, checker, storage, ctx, errors) + } Expr::ProgrammaticCall(ProgrammaticCall { path, arguments, @@ -696,27 +731,8 @@ fn ascribe_type( } TypedExpr::error(span.clone()) } - Expr::VarReference(VarReference { - name, - segment: span, - }) => match table.lookup_variable(name.name()) { - Ok(var) => TypedExpr { - kind: ExprKind::Reference(var.id.clone()), - span: span.clone(), - ty: var.ty, - }, - Err(err) => { - errors.push(TypeError::new( - TypeErrorKind::UndefinedSymbol { - name: name.name().to_owned(), - expected: SymbolRegistry::Variable, - found: err.into(), - }, - SourceLocation::new(table.path().to_owned(), span.clone()), - )); - TypedExpr::error(span.clone()) - } - }, + Expr::VarReference(ident) => ascribe_var_reference(ident, table, errors), + Expr::Path(ident) => ascribe_identifier(ident, table, errors), Expr::Block(Block { expressions, segment: span, @@ -743,6 +759,12 @@ fn ascribe_type( Expr::Pipeline(pipeline) => { ascribe_pipeline(pipeline, table, checker, storage, ctx, errors) } + Expr::Range(iterable) => match iterable { + Iterable::Range(range) => todo!("{range:?}"), + Iterable::Files(pattern) => { + ascribe_file_pattern(pattern, table, checker, storage, ctx, errors) + } + }, Expr::If(If { condition, success_branch, @@ -750,6 +772,7 @@ fn ascribe_type( segment: span, }) => { let typed_condition = ascribe_type(condition, table, checker, storage, ctx, errors); + let typed_condition = coerce_condition(typed_condition, table, checker, errors); if let Err(_) = checker.types.unify(typed_condition.ty, BOOL_TYPE) { errors.push(TypeError::new( TypeErrorKind::TypeMismatch { @@ -899,7 +922,11 @@ fn ascribe_type( &type_parameters, ); TypedExpr { - kind: ExprKind::Noop, + kind: ExprKind::MethodCall(crate::hir::MethodCall { + callee: Box::new(typed_source), + arguments: args, + function_id: *method, + }), span: span.clone(), ty: return_type, } @@ -1344,9 +1371,69 @@ fn ascribe_import( } } } - Import::AllIn(_, _) => {} + Import::AllIn(path, span) => { + let tree = modules.get_direct(path).expect("path should be defined"); + for export in tree.exports.iter() { + table.insert_remote(export.name.clone(), span.clone(), export); + } + } Import::Environment(_) => {} - Import::List(_) => {} + Import::List(ImportList { + root, + imports, + segment: span, + }) => { + // FIXME: a bit messy and should prefer tree.exports instead of tree.children + let mut tree = modules.get_direct(root).expect("root should be defined"); + for import in imports { + match import { + Import::Symbol(symbol) => { + let mut iter = symbol.path.iter(); + while let Some(path) = iter.next() { + let InclusionPathItem::Symbol(ident) = path else { + panic!("path should be a symbol"); + }; + match tree.get(OsStr::new(ident.value.as_str())) { + Some(child_tree) => tree = child_tree, + None => { + let mut found = false; + if iter.next().is_none() { + for export in tree.exports.iter() { + if export.name != ident.value { + continue; + } + found = true; + table.insert_remote( + ident.value.to_string(), + ident.segment(), + export, + ); + } + } + if !found { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: ident.value.to_string(), + expected: SymbolRegistry::Type, + found: None, + }, + SourceLocation::new( + table.path().to_owned(), + ident.segment(), + ), + )); + } + break; + } + } + } + } + Import::AllIn(_, _) => todo!(), + Import::Environment(_) => {} + Import::List(_) => todo!(), + } + } + } } } @@ -1368,12 +1455,14 @@ mod tests { fn check(fs: MemoryFilesystem, entrypoint: &str) -> Vec { let mut database = Database::default(); let mut reef = Reef::new(OsString::from("test")); - assert_eq!( - import_multi(&mut reef, &fs, entrypoint), - [], - "no import errors should be found" + let import_result = import_multi(&mut reef, &fs, entrypoint); + assert_eq!(import_result.errors, [], "no import errors should be found"); + let hoist_result = hoist_files( + &database.exports, + &mut reef, + &mut database.checker, + import_result.keys, ); - let hoist_result = hoist_files(&database.exports, &mut reef, &mut database.checker); assert_eq!( hoist_result.errors, [], diff --git a/analyzer/src/typing/assign.rs b/analyzer/src/typing/assign.rs index 4737f128..023b1a81 100644 --- a/analyzer/src/typing/assign.rs +++ b/analyzer/src/typing/assign.rs @@ -1,5 +1,7 @@ -use crate::hir::{ExprKind, LocalAssignment, Module, TypedExpr}; +use crate::hir::{ExprKind, LocalAssignment, MethodCall, Module, TypedExpr}; use crate::symbol::{SymbolRegistry, UndefinedSymbol}; +use crate::typing::function::Function; +use crate::typing::user::UserType; use crate::typing::variable::VariableTable; use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; use crate::SourceLocation; @@ -7,7 +9,7 @@ use ast::operation::{BinaryOperation, BinaryOperator}; use ast::r#struct::FieldAccess; use ast::r#use::InclusionPathItem; use ast::range::Subscript; -use ast::variable::{Assign, AssignOperator}; +use ast::variable::{Assign, AssignOperator, Path, VarName, VarReference}; use ast::Expr; use context::source::SourceSegmentHolder; @@ -92,6 +94,90 @@ pub(super) fn ascribe_assign( } } +pub(super) fn ascribe_subscript( + subscript: &Subscript, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let target = ascribe_type( + &subscript.target, + table, + checker, + storage, + ctx.with_hint(TypeHint::Used), + errors, + ); + let index = ascribe_type( + &subscript.index, + table, + checker, + storage, + ctx.with_hint(TypeHint::Used), + errors, + ); + if target.is_err() || index.is_err() { + return TypedExpr::error(subscript.segment()); + } + let UserType::Parametrized { schema, params } = checker.types[target.ty].clone() else { + panic!("Expected a parametrized type"); + }; + let mut generics = checker.registry[schema].generic_variables.clone(); + let name = "[]"; + let Some(method_id) = checker.registry[schema].methods.get(name).copied() else { + errors.push(TypeError::new( + TypeErrorKind::UnknownMethod { + name: name.to_owned(), + type_name: checker.display(target.ty), + }, + SourceLocation::new(table.path().to_owned(), subscript.segment()), + )); + return TypedExpr::error(subscript.segment()); + }; + let Function { + ref generic_variables, + ref param_types, + return_type, + .. + } = checker.registry[method_id]; + generics.extend(generic_variables); + let return_type = checker.types.concretize(return_type, &generics, ¶ms); + let [_self_param, index_param] = param_types.as_slice() else { + errors.push(TypeError::new( + TypeErrorKind::ArityMismatch { + expected: param_types.len(), + received: 1, + }, + SourceLocation::new(table.path().to_owned(), subscript.segment()), + )); + return TypedExpr::error(subscript.segment()); + }; + match checker.types.unify(index_param.ty, index.ty) { + Ok(_) => TypedExpr { + kind: ExprKind::MethodCall(MethodCall { + callee: Box::new(target), + arguments: vec![index], + function_id: method_id, + }), + ty: return_type, + span: subscript.segment(), + }, + Err(_) => { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(index_param.ty), + expected_due_to: None, + actual: checker.display(index.ty), + }, + SourceLocation::new(table.path().to_owned(), index.span), + )); + TypedExpr::error(subscript.segment()) + } + } +} + /// Creates the right hand side of an assignment. /// /// The state should contain the [`ExpressionValue::Expected`] value of the left hand side. @@ -146,3 +232,47 @@ fn ascribe_field_assign( ) -> TypedExpr { todo!() } + +pub(super) fn ascribe_identifier( + ident: &Path, + table: &mut VariableTable, + errors: &mut Vec, +) -> TypedExpr { + assert_eq!(ident.path.len(), 1); + ascribe_var_reference( + &VarReference { + name: VarName::User(ident.path.last().unwrap().name().into()), + segment: ident.segment(), + }, + table, + errors, + ) +} + +pub(super) fn ascribe_var_reference( + VarReference { + name, + segment: span, + }: &VarReference, + table: &mut VariableTable, + errors: &mut Vec, +) -> TypedExpr { + match table.lookup_variable(name.name()) { + Ok(var) => TypedExpr { + kind: ExprKind::Reference(var.id.clone()), + span: span.clone(), + ty: var.ty, + }, + Err(err) => { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: name.name().to_owned(), + expected: SymbolRegistry::Variable, + found: err.into(), + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + TypedExpr::error(span.clone()) + } + } +} diff --git a/analyzer/src/typing/flow.rs b/analyzer/src/typing/flow.rs new file mode 100644 index 00000000..cdc9a56e --- /dev/null +++ b/analyzer/src/typing/flow.rs @@ -0,0 +1,64 @@ +use crate::hir::{ExprKind, Loop, Module, TypedExpr}; +use crate::typing::lower::coerce_condition; +use crate::typing::user::{BOOL_TYPE, NOTHING_TYPE, UNIT_TYPE}; +use crate::typing::variable::VariableTable; +use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; +use crate::SourceLocation; +use ast::control_flow::While; +use context::source::{SourceSegment, SourceSegmentHolder}; + +pub(super) fn ascribe_while( + stmt: &While, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let condition = ascribe_type( + &stmt.condition, + table, + checker, + storage, + ctx.with_hint(TypeHint::Required(BOOL_TYPE)), + errors, + ); + let body = ascribe_type( + &stmt.body, + table, + checker, + storage, + ctx.with_hint(TypeHint::Unused).in_loop(), + errors, + ); + TypedExpr { + kind: ExprKind::ConditionalLoop(Loop { + condition: Some(Box::new(coerce_condition( + condition, table, checker, errors, + ))), + body: Box::new(body), + }), + ty: UNIT_TYPE, + span: stmt.segment(), + } +} + +pub(super) fn ascribe_control( + kind: ExprKind, + span: SourceSegment, + table: &mut VariableTable, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + if !ctx.in_loop { + errors.push(TypeError::new( + TypeErrorKind::ControlOutsideLoop, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + } + TypedExpr { + kind, + ty: NOTHING_TYPE, + span, + } +} diff --git a/analyzer/src/typing/function.rs b/analyzer/src/typing/function.rs index d20e6684..01b4dabb 100644 --- a/analyzer/src/typing/function.rs +++ b/analyzer/src/typing/function.rs @@ -11,8 +11,33 @@ pub struct Function { pub kind: FunctionKind, } -#[derive(Debug, PartialEq, Eq, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum FunctionKind { Function, Constructor, + Intrinsic, +} + +impl Function { + pub fn native( + fqn: &'static str, + generic_variables: Vec, + param_types: Vec, + return_type: TypeId, + ) -> Self { + Self { + declared_at: PathBuf::new(), + fqn: PathBuf::from(fqn), + generic_variables, + param_types: param_types + .into_iter() + .map(|ty| Parameter { + ty, + span: Default::default(), + }) + .collect(), + return_type, + kind: FunctionKind::Intrinsic, + } + } } diff --git a/analyzer/src/typing/lower.rs b/analyzer/src/typing/lower.rs index 832fcf20..26ea740c 100644 --- a/analyzer/src/typing/lower.rs +++ b/analyzer/src/typing/lower.rs @@ -1,6 +1,6 @@ use crate::hir::{ExprKind, MethodCall, Module, TypedExpr}; use crate::typing::registry::STRING_SCHEMA; -use crate::typing::user::{UserType, STRING_TYPE}; +use crate::typing::user::{UserType, BOOL_TYPE, EXITCODE_TYPE, STRING_TYPE}; use crate::typing::variable::VariableTable; use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; use crate::SourceLocation; @@ -24,6 +24,15 @@ pub(super) fn ascribe_template_string( }; } + let concat = checker.registry[STRING_SCHEMA] + .get_exact_method( + &checker.types, + &checker.registry, + "concat", + &[STRING_TYPE, STRING_TYPE], + STRING_TYPE, + ) + .expect("String schema does not have a `concat` method"); let mut it = tpl.parts.iter().map(|part| { let typed_part = ascribe_type( part, @@ -42,7 +51,7 @@ pub(super) fn ascribe_template_string( kind: ExprKind::MethodCall(MethodCall { callee: Box::new(acc), arguments: vec![current], - function_id: todo!("String concatenation"), + function_id: concat, }), ty: STRING_TYPE, span, @@ -63,9 +72,13 @@ pub(super) fn convert_into_string( return expr; } let schema = &checker.registry[*schema]; - if let Some(method) = - schema.get_exact_method(&checker.registry, "to_string", &[], STRING_TYPE) - { + if let Some(method) = schema.get_exact_method( + &checker.types, + &checker.registry, + "to_string", + &[expr.ty], + STRING_TYPE, + ) { let span = expr.span.clone(); TypedExpr { kind: ExprKind::MethodCall(MethodCall { @@ -100,3 +113,33 @@ pub(super) fn convert_into_string( } } } + +pub(super) fn coerce_condition( + mut expr: TypedExpr, + table: &mut VariableTable, + checker: &mut TypeChecker, + errors: &mut Vec, +) -> TypedExpr { + if expr.ty == EXITCODE_TYPE { + let span = expr.span.clone(); + expr = TypedExpr { + kind: ExprKind::Cast(Box::new(expr)), + ty: BOOL_TYPE, + span, + }; + } + match checker.types.unify(expr.ty, BOOL_TYPE) { + Ok(_) => expr, + Err(_) => { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(BOOL_TYPE), + expected_due_to: None, + actual: checker.display(expr.ty), + }, + SourceLocation::new(table.path().to_owned(), expr.span.clone()), + )); + expr + } + } +} diff --git a/analyzer/src/typing/operator.rs b/analyzer/src/typing/operator.rs new file mode 100644 index 00000000..3da1155f --- /dev/null +++ b/analyzer/src/typing/operator.rs @@ -0,0 +1,104 @@ +use crate::hir::{ExprKind, MethodCall, Module, TypedExpr}; +use crate::typing::function::Function; +use crate::typing::user::UserType; +use crate::typing::variable::VariableTable; +use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind}; +use crate::SourceLocation; +use ast::operation::{BinaryOperation, BinaryOperator}; +use context::source::SourceSegmentHolder; + +pub(super) fn ascribe_binary( + binary: &BinaryOperation, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let left = ascribe_type(&binary.left, table, checker, storage, ctx, errors); + let right = ascribe_type(&binary.right, table, checker, storage, ctx, errors); + if left.is_err() || right.is_err() { + return TypedExpr::error(binary.segment()); + } + let UserType::Parametrized { schema, params: _ } = checker.types[left.ty] else { + panic!("Expected a parametrized type"); + }; + let name = name_binary_method(binary.op); + let Some(method_id) = checker.registry[schema].methods.get(name).copied() else { + errors.push(TypeError::new( + TypeErrorKind::UnknownMethod { + name: name.to_owned(), + type_name: checker.display(left.ty), + }, + SourceLocation::new(table.path().to_owned(), binary.segment()), + )); + return TypedExpr::error(binary.segment()); + }; + let Function { + ref param_types, + return_type, + .. + } = checker.registry[method_id]; + let [self_param, param] = param_types.as_slice() else { + errors.push(TypeError::new( + TypeErrorKind::ArityMismatch { + expected: param_types.len(), + received: 1, + }, + SourceLocation::new(table.path().to_owned(), binary.segment()), + )); + return TypedExpr::error(binary.segment()); + }; + if let Err(_) = checker.types.unify(self_param.ty, left.ty) { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(self_param.ty), + expected_due_to: None, + actual: checker.display(left.ty), + }, + SourceLocation::new(table.path().to_owned(), left.span), + )); + return TypedExpr::error(binary.segment()); + } + match checker.types.unify(left.ty, param.ty) { + Ok(_) => TypedExpr { + kind: ExprKind::MethodCall(MethodCall { + callee: Box::new(left), + arguments: vec![right], + function_id: method_id, + }), + ty: return_type, + span: binary.segment(), + }, + Err(_) => { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(param.ty), + expected_due_to: None, + actual: checker.display(right.ty), + }, + SourceLocation::new(table.path().to_owned(), right.span), + )); + TypedExpr::error(binary.segment()) + } + } +} + +fn name_binary_method(op: BinaryOperator) -> &'static str { + use BinaryOperator as Op; + match op { + Op::Plus => "add", + Op::Minus => "sub", + Op::Times => "mul", + Op::Divide => "div", + Op::Modulo => "mod", + Op::And => "and", + Op::Or => "or", + Op::EqualEqual => "eq", + Op::NotEqual => "ne", + Op::Less => "lt", + Op::LessEqual => "le", + Op::Greater => "gt", + Op::GreaterEqual => "ge", + } +} diff --git a/analyzer/src/typing/registry.rs b/analyzer/src/typing/registry.rs index a9af3416..6091bf33 100644 --- a/analyzer/src/typing/registry.rs +++ b/analyzer/src/typing/registry.rs @@ -1,6 +1,5 @@ use crate::typing::function::Function; use crate::typing::schema::Schema; -use crate::typing::user::GENERIC_TYPE; use std::ops::{Index, IndexMut}; #[derive(Clone)] @@ -17,6 +16,7 @@ pub struct FunctionId(usize); impl Default for Registry { fn default() -> Self { + use crate::typing::user::{GENERIC_TYPE, INT_TYPE, VECTOR_TYPE}; Self { schemas: vec![ Schema::new("Int".to_owned()), @@ -24,11 +24,21 @@ impl Default for Registry { Schema::new("Exitcode".to_owned()), Schema::new("Float".to_owned()), Schema::new("String".to_owned()), - Schema::generic("Vec".to_owned(), vec![GENERIC_TYPE]), + { + let mut vec = Schema::generic("Vec".to_owned(), vec![GENERIC_TYPE]); + vec.methods.insert("[]".to_owned(), FunctionId(0)); + vec + }, Schema::new("Glob".to_owned()), Schema::new("Pid".to_owned()), + Schema::generic("Option".to_owned(), vec![GENERIC_TYPE]), ], - functions: Vec::new(), + functions: vec![Function::native( + "Vec/[]", + vec![], + vec![VECTOR_TYPE, INT_TYPE], + GENERIC_TYPE, + )], } } } @@ -41,6 +51,7 @@ pub const STRING_SCHEMA: SchemaId = SchemaId(4); pub const VEC_SCHEMA: SchemaId = SchemaId(5); pub const GLOB_SCHEMA: SchemaId = SchemaId(6); pub const PID_SCHEMA: SchemaId = SchemaId(7); +pub const OPTION_SCHEMA: SchemaId = SchemaId(8); impl Registry { /// Allocates a new [`SchemaId`] for the given [`Schema`]. diff --git a/analyzer/src/typing/schema.rs b/analyzer/src/typing/schema.rs index 1e375491..55dee7ef 100644 --- a/analyzer/src/typing/schema.rs +++ b/analyzer/src/typing/schema.rs @@ -1,4 +1,5 @@ use crate::typing::registry::{FunctionId, Registry}; +use crate::typing::user::TypeArena; use crate::typing::{Parameter, TypeId}; use std::collections::HashMap; @@ -39,8 +40,10 @@ impl Schema { } } + /// Finds a method that have those exact name, parameters and return type. pub fn get_exact_method( &self, + types: &TypeArena, registry: &Registry, name: &str, params: &[TypeId], @@ -52,8 +55,9 @@ impl Schema { .param_types .iter() .map(|param| param.ty) - .eq(params.iter().copied()) - && func.return_type == return_ty + .zip(params.iter()) + .all(|(param_ty, ty)| types.are_same(param_ty, *ty)) + && types.are_same(func.return_type, return_ty) { Some(id) } else { diff --git a/analyzer/src/typing/shell.rs b/analyzer/src/typing/shell.rs index 14c03e92..2f6eaa92 100644 --- a/analyzer/src/typing/shell.rs +++ b/analyzer/src/typing/shell.rs @@ -1,10 +1,16 @@ -use crate::hir::{ExprKind, Module, Redir, Redirect, Subprocess, Substitute, TypedExpr}; +use crate::hir::{ + ExprKind, MethodCall, Module, Redir, Redirect, Subprocess, Substitute, TypedExpr, +}; use crate::typing::lower::convert_into_string; -use crate::typing::user::{EXITCODE_TYPE, GLOB_TYPE, INT_TYPE, PID_TYPE, STRING_TYPE}; +use crate::typing::registry::GLOB_SCHEMA; +use crate::typing::user::{ + EXITCODE_TYPE, GLOB_TYPE, INT_TYPE, PID_TYPE, STRING_TYPE, STRING_VECTOR_TYPE, +}; use crate::typing::variable::VariableTable; use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; use crate::SourceLocation; use ast::call::{Call, Detached, Pipeline, RedirOp, Redirected}; +use ast::range::FilePattern; use ast::substitution::Substitution; use context::source::SourceSegmentHolder; @@ -22,7 +28,25 @@ pub(super) fn ascribe_call( .map(|expr| { let expr = ascribe_type(expr, table, checker, storage, ctx, errors); if expr.ty == GLOB_TYPE { - todo!("globbing") + let glob = checker.registry[GLOB_SCHEMA] + .get_exact_method( + &checker.types, + &checker.registry, + "expand", + &[], + STRING_VECTOR_TYPE, + ) + .expect("Glob schema does not have a `expand` method"); + let span = expr.span.clone(); + TypedExpr { + kind: ExprKind::MethodCall(MethodCall { + callee: Box::new(expr), + arguments: Vec::new(), + function_id: glob, + }), + ty: STRING_VECTOR_TYPE, + span, + } } else { convert_into_string(expr, checker, table.path(), errors) } @@ -147,3 +171,20 @@ pub(super) fn ascribe_substitution( span: substitution.segment(), } } + +pub(super) fn ascribe_file_pattern( + pattern: &FilePattern, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let mut expr = ascribe_type(&pattern.pattern, table, checker, storage, ctx, errors); + if expr.ty == STRING_TYPE { + expr.ty = GLOB_TYPE; + } else if expr.is_ok() { + panic!("pattern should be of type String"); + } + expr +} diff --git a/analyzer/src/typing/user.rs b/analyzer/src/typing/user.rs index 6052567a..2c1fb5d6 100644 --- a/analyzer/src/typing/user.rs +++ b/analyzer/src/typing/user.rs @@ -54,6 +54,33 @@ impl TypeArena { TypeId(id) } + pub(crate) fn are_same(&self, lhs: TypeId, rhs: TypeId) -> bool { + match (&self[lhs], &self[rhs]) { + ( + UserType::Parametrized { + schema: lhs, + params: lparams, + }, + UserType::Parametrized { + schema: rhs, + params: rparams, + }, + ) => { + lhs == rhs + && lparams + .iter() + .zip(rparams.iter()) + .all(|(l, r)| self.are_same(*l, *r)) + } + (a, b) => a == b, + } + } + + /// Merges two types into one. + /// + /// If the types are not compatible, an error is returned that may be expanded into a more + /// detailed error message in the future. If successful, a new type may be created and returned, + /// so the caller should always use the returned value. pub(crate) fn unify(&mut self, rhs: TypeId, assign_to: TypeId) -> Result { match (&self[assign_to], &self[rhs]) { (UserType::Error, _) | (_, UserType::Error) => Ok(ERROR_TYPE), @@ -79,6 +106,10 @@ impl TypeArena { schema, params: sub_params, } => { + if generics == params { + // Avoid creating a new type if the type is not parametrized and can be reused. + return ty; + } let concrete_params = sub_params .iter() .map(|ty| { @@ -122,7 +153,9 @@ pub const STRING_TYPE: TypeId = TypeId(8); pub const GENERIC_TYPE: TypeId = TypeId(9); pub const VECTOR_TYPE: TypeId = TypeId(10); pub const GLOB_TYPE: TypeId = TypeId(11); -pub const PID_TYPE: TypeId = TypeId(13); +pub const PID_TYPE: TypeId = TypeId(12); +pub const OPTION_TYPE: TypeId = TypeId(13); +pub(crate) const STRING_VECTOR_TYPE: TypeId = TypeId(14); /// Gets the [`TypeId`] for a built-in type by its name. pub(crate) fn lookup_builtin_type(name: &str) -> Option { @@ -137,6 +170,7 @@ pub(crate) fn lookup_builtin_type(name: &str) -> Option { "Vec" => Some(VECTOR_TYPE), "Glob" => Some(GLOB_TYPE), "Pid" => Some(PID_TYPE), + "Option" => Some(OPTION_TYPE), _ => None, } } @@ -191,6 +225,14 @@ impl Default for TypeArena { }, UserType::from(registry::GLOB_SCHEMA), UserType::from(registry::PID_SCHEMA), + UserType::Parametrized { + schema: registry::OPTION_SCHEMA, + params: vec![GENERIC_TYPE], + }, + UserType::Parametrized { + schema: registry::VEC_SCHEMA, + params: vec![STRING_TYPE], + }, ], } } diff --git a/cli/src/cli.rs b/cli/src/cli.rs index 6115a225..c509b212 100644 --- a/cli/src/cli.rs +++ b/cli/src/cli.rs @@ -1,17 +1,16 @@ use crate::disassemble::display_bytecode; -use crate::pipeline::RealFilesystem; +use crate::pipeline::{Pipeline, PipelineStatus}; use crate::report::{error_to_diagnostic, MultiFile}; use analyzer::{Database, PipelineError, Reef}; use clap::Parser; use clap_complete::Shell; -use cli::pipeline::PipelineStatus; use compiler::{compile_reef, CompilerOptions}; use miette::Report; use std::path::PathBuf; -use vm::{VmError, VM}; +use vm::VmError; /// The Moshell scripting language. -#[derive(Parser)] +#[derive(Parser, Default)] #[command(author, version, about, long_about = None)] pub struct Cli { /// The inline source code to parse @@ -47,8 +46,11 @@ pub struct Cli { pub fn use_pipeline( database: &Database, reef: &Reef, - fs: &RealFilesystem, - vm: &mut VM, + Pipeline { + filesystem, + compiler_state, + vm, + }: &mut Pipeline, errors: Vec, config: &Cli, ) -> PipelineStatus { @@ -59,7 +61,7 @@ pub fn use_pipeline( PipelineError::Parse { .. } | PipelineError::Type(_) => PipelineStatus::AnalysisError, }); let mut multi_file = MultiFile::default(); - let diagnostic = error_to_diagnostic(error, &mut multi_file, fs); + let diagnostic = error_to_diagnostic(error, &mut multi_file, filesystem); let report = Report::from(diagnostic).with_source_code(multi_file); eprintln!("{report:?}"); } @@ -72,6 +74,7 @@ pub fn use_pipeline( database, reef, &mut bytes, + compiler_state, CompilerOptions { line_provider: None, last_page_storage_var: None, diff --git a/cli/src/lib.rs b/cli/src/lib.rs index b3794dad..d3c886ab 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -1,7 +1,5 @@ use directories::ProjectDirs; -pub mod pipeline; - pub fn project_dir() -> Option { ProjectDirs::from("", "", "moshell") } diff --git a/cli/src/library.rs b/cli/src/library.rs new file mode 100644 index 00000000..6b39fb63 --- /dev/null +++ b/cli/src/library.rs @@ -0,0 +1,54 @@ +use crate::cli::{use_pipeline, Cli}; +use crate::pipeline::{Pipeline, PipelineStatus, REPLFilesystem}; +use analyzer::{analyze_multi, freeze_exports, Database, Reef}; +use cli::project_dir; +use std::ffi::OsString; +use std::path::{Path, PathBuf}; + +pub(crate) fn build_std(database: &mut Database, pipeline: &mut Pipeline) { + let std_file = find_std(); + let mut reef = Reef::new(OsString::from("std")); + let old_fs = std::mem::replace(&mut pipeline.filesystem, REPLFilesystem::new(std_file)); + let errors = analyze_multi(database, &mut reef, &pipeline.filesystem, "std"); + match use_pipeline(database, &reef, pipeline, errors, &Cli::default()) { + PipelineStatus::Success => {} + PipelineStatus::IoError => panic!( + "Unable to find the standard library, check the MOSHELL_STD environment variable" + ), + status => panic!("Could not build std: {:?}", status), + } + pipeline.filesystem = old_fs; + freeze_exports(database, reef); +} + +fn find_std() -> PathBuf { + if let Ok(path) = std::env::var("MOSHELL_STD") { + return PathBuf::from(path); + } + + // let mut dir = std::env::current_dir().expect("Could not get current directory"); + // dir.push("lib"); + // dir.push("std.msh"); + // if dir.exists() { + // dir.pop(); + // return dir; + // } + + if let Some(proj_dirs) = project_dir() { + let lib = proj_dirs.data_dir().join("lib"); + if lib.exists() { + return lib; + } + } + + #[cfg(unix)] + { + for path in ["/usr/local/share/moshell/lib", "/usr/share/moshell/lib"] { + let path = Path::new(path); + if path.exists() { + return path.to_path_buf(); + } + } + } + panic!("Could not determine a valid std emplacement. Please provide a valid stdlib path under a MOSHELL_STD= env variable.") +} diff --git a/cli/src/main.rs b/cli/src/main.rs index 8ee03a5d..b5175fed 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,19 +1,21 @@ use crate::cli::{use_pipeline, Cli}; -use crate::pipeline::RealFilesystem; +use crate::library::build_std; +use crate::pipeline::{Pipeline, PipelineStatus, REPLFilesystem}; use crate::repl::repl; use crate::terminal::signal_hook; -use ::cli::pipeline::PipelineStatus; use analyzer::{analyze_multi, Database, Reef}; use clap::{CommandFactory, Parser}; +use compiler::CompilerState; use nix::sys::signal; use std::ffi::OsString; use std::io; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use vm::VM; mod cli; mod complete; mod disassemble; +mod library; mod pipeline; mod repl; mod report; @@ -45,7 +47,7 @@ fn main() -> miette::Result { return Ok(PipelineStatus::Success); } - let mut vm = VM::new( + let vm = VM::new( cli.source .iter() .flat_map(|p| p.to_str()) @@ -53,24 +55,43 @@ fn main() -> miette::Result { .chain(std::mem::take(&mut cli.program_arguments)) .collect(), ); - let fs = RealFilesystem { - root: PathBuf::new(), + let fs = REPLFilesystem::new( + cli.source + .as_deref() + .and_then(Path::parent) + .map(Path::to_path_buf) + .unwrap_or_default(), + ); + let mut pipeline = Pipeline { + filesystem: fs, + compiler_state: CompilerState::default(), + vm, }; let mut database = Database::default(); + build_std(&mut database, &mut pipeline); if let Some(source) = cli.source.take() { - return Ok(run(source, &mut database, &fs, &mut vm, &cli)); + return Ok(run( + source.file_name().map(PathBuf::from).unwrap_or_default(), + &mut database, + &mut pipeline, + &cli, + )); } - repl(&cli, &mut database, &fs, &mut vm) + repl(&cli, &mut database, &mut pipeline) } fn run( source: PathBuf, database: &mut Database, - fs: &RealFilesystem, - vm: &mut VM, + pipeline: &mut Pipeline, config: &Cli, ) -> PipelineStatus { let mut reef = Reef::new(OsString::from("foo")); - let errors = analyze_multi(database, &mut reef, fs, &source.display().to_string()); - use_pipeline(database, &reef, fs, vm, errors, config) + let errors = analyze_multi( + database, + &mut reef, + &pipeline.filesystem, + &source.display().to_string(), + ); + use_pipeline(database, &reef, pipeline, errors, config) } diff --git a/cli/src/pipeline.rs b/cli/src/pipeline.rs index 0ca5b3d6..29b06c41 100644 --- a/cli/src/pipeline.rs +++ b/cli/src/pipeline.rs @@ -1,6 +1,15 @@ -use analyzer::Filesystem; +use analyzer::{FileImporter, Filesystem}; +use compiler::CompilerState; +use std::io; use std::path::{Path, PathBuf}; use std::process::{ExitCode, Termination}; +use vm::VM; + +pub(crate) struct Pipeline { + pub(crate) filesystem: REPLFilesystem, + pub(crate) compiler_state: CompilerState, + pub(crate) vm: VM, +} /// Represents the state of the pipeline. #[repr(u8)] @@ -36,14 +45,35 @@ impl Termination for PipelineStatus { } } -pub(super) struct RealFilesystem { - pub(super) root: PathBuf, +pub(super) struct REPLFilesystem { + base: FileImporter, + stdin: String, +} + +impl REPLFilesystem { + pub(super) fn new(root: PathBuf) -> Self { + Self { + base: FileImporter::new(root), + stdin: String::new(), + } + } } -impl Filesystem for RealFilesystem { - fn read(&self, path: &Path) -> std::io::Result { - let mut path = self.root.join(path); - path.set_extension("msh"); - std::fs::read_to_string(path) +impl Filesystem for REPLFilesystem { + fn read(&self, path: &Path) -> io::Result { + if path == Path::new("stdin") { + return Ok(self.stdin.clone()); + } + self.base.read(path) + } +} + +impl REPLFilesystem { + pub(super) fn add(&mut self, source: &str) -> PathBuf { + if !self.stdin.is_empty() { + self.stdin.push('\n'); + } + self.stdin.push_str(source); + PathBuf::from("stdin") } } diff --git a/cli/src/repl.rs b/cli/src/repl.rs index bba0a70e..d9068ae5 100644 --- a/cli/src/repl.rs +++ b/cli/src/repl.rs @@ -1,5 +1,4 @@ -use analyzer::{Database, Reef}; -use cli::pipeline::PipelineStatus; +use analyzer::{append_source, Database, Reef}; use miette::{Context, IntoDiagnostic}; use nu_ansi_term::Color; use reedline::{ @@ -13,25 +12,23 @@ use std::io::{self, BufRead, IsTerminal, StdinLock}; use cli::project_dir; use lexer::is_unterminated; -use vm::VM; -use crate::cli::Cli; +use crate::cli::{use_pipeline, Cli}; use crate::complete::MoshellCompleter; -use crate::pipeline::RealFilesystem; +use crate::pipeline::{Pipeline, PipelineStatus}; use crate::terminal::acquire_terminal; /// Indefinitely prompts a new expression from stdin and executes it. pub(crate) fn repl( config: &Cli, database: &mut Database, - fs: &RealFilesystem, - vm: &mut VM, + pipeline: &mut Pipeline, ) -> miette::Result { let mut reef = Reef::new(OsString::from("stdin")); let mut editor = if io::stdin().is_terminal() && cfg!(not(miri)) { #[cfg(unix)] - vm.set_pgid(acquire_terminal().as_raw()); + pipeline.vm.set_pgid(acquire_terminal().as_raw()); Editor::LineEditor(Box::new(editor().context("Could not start REPL")?)) } else { Editor::NoEditor(MultilineInput::new(io::stdin().lock())) @@ -39,27 +36,16 @@ pub(crate) fn repl( let mut status = PipelineStatus::Success; - // Keep track of the previous attributed source, so that we can inject - // the next one into the same context. - //let mut starting_source: Option = None; - loop { let line = editor.read_line(&Prompt); match line { Ok(Signal::Success(source)) => { - // let source = OwnedSource::new(source, "stdin".to_owned()); - // status = status.compose(consume( - // &name, - // &mut analyzer, - // &externals, - // &mut compiler_externals, - // &mut vm, - // &mut sources, - // config, - // &mut starting_source, - // source, - // )); + let path = pipeline.filesystem.add(&source); + let errors = + append_source(database, &mut reef, &pipeline.filesystem, path, &source); + status = status.compose(use_pipeline(database, &reef, pipeline, errors, config)); + reef.clear_cache(); } Ok(Signal::CtrlC) => eprintln!("^C"), Ok(Signal::CtrlD) => break Ok(status), diff --git a/cli/src/report.rs b/cli/src/report.rs index 9ceaefba..e32485f1 100644 --- a/cli/src/report.rs +++ b/cli/src/report.rs @@ -70,10 +70,11 @@ fn type_error_to_diagnostic( expected_span, )) } - TypeErrorKind::UnknownField { available, .. } => diagnostic.with_help(format!( - "Available fields: {}", - available.into_iter().collect::>().join(", ") - )), + TypeErrorKind::UnknownField { available, .. } if !available.is_empty() => diagnostic + .with_help(format!( + "Available fields: {}", + available.into_iter().collect::>().join(", ") + )), TypeErrorKind::TypeAnnotationRequired { types, insert_at } => { let span = multi_file.insert(at.path, insert_at..insert_at, fs); diagnostic.with_label(LabeledSpan::new_with_span( @@ -115,7 +116,9 @@ impl MultiFile { start += source.source.len(); } } - let source = fs.read(&path).unwrap(); + let Ok(source) = fs.read(&path) else { + panic!("unable to re-read file: {}", path.display()); + }; self.sources.push(VirtualFile { name: path, source: source.to_string(), diff --git a/compiler/src/emit.rs b/compiler/src/emit.rs index f8cc3991..f8468945 100644 --- a/compiler/src/emit.rs +++ b/compiler/src/emit.rs @@ -11,7 +11,7 @@ use crate::emit::invoke::{ emit_subprocess, emit_substitution, }; use crate::emit::jump::{emit_break, emit_conditional, emit_continue, emit_loop}; -use crate::emit::native::emit_natives; +use crate::emit::native::{emit_cast, emit_natives}; use crate::emit::structure::{emit_field_access, emit_field_assign}; use crate::locals::LocalsLayout; use crate::r#type::ValueStackSize; @@ -258,6 +258,10 @@ pub fn emit( ExprKind::Substitute(substitution) => { emit_substitution(substitution, instructions, ctx, cp, locals, state); } + ExprKind::Cast(inner) => { + emit(inner, instructions, ctx, cp, locals, state); + emit_cast(inner.ty, expr.ty, instructions); + } ExprKind::Noop => {} } instructions.push_position(expr.span.start) diff --git a/compiler/src/emit/invoke.rs b/compiler/src/emit/invoke.rs index 903bbcad..f925cf92 100644 --- a/compiler/src/emit/invoke.rs +++ b/compiler/src/emit/invoke.rs @@ -3,6 +3,7 @@ use analyzer::typing::function::FunctionKind; use analyzer::typing::registry::SchemaId; use analyzer::typing::user::{TypeId, UserType, INT_TYPE, STRING_TYPE, VECTOR_TYPE}; use libc::{O_APPEND, O_CREAT, O_RDONLY, O_RDWR, O_TRUNC, O_WRONLY}; +use std::ffi::OsStr; use ast::call::{RedirFd, RedirOp}; @@ -226,7 +227,14 @@ pub fn emit_function_invocation( // thus we can init it from all the pushed constructor's parameters in the operands instructions.emit_copy_operands(layout.total_size); } else { - let signature_idx = cp.insert_string(function.fqn.display()); + let signature_idx = cp.insert_string( + function + .fqn + .iter() + .map(OsStr::to_string_lossy) + .collect::>() + .join("::"), + ); instructions.emit_invoke(signature_idx); } diff --git a/compiler/src/emit/native.rs b/compiler/src/emit/native.rs index ce867843..976fb37e 100644 --- a/compiler/src/emit/native.rs +++ b/compiler/src/emit/native.rs @@ -1,8 +1,11 @@ -use crate::bytecode::Instructions; +use crate::bytecode::{Instructions, Opcode}; use crate::constant_pool::ConstantPool; -use crate::emit::{EmissionState, EmitterContext}; +use crate::emit::{emit, EmissionState, EmitterContext}; use crate::locals::LocalsLayout; +use crate::r#type::ValueStackSize; use analyzer::hir::MethodCall; +use analyzer::typing::function::FunctionKind; +use analyzer::typing::user; use analyzer::typing::user::TypeId; const STRING_EQ: &str = "lang::String::eq"; @@ -27,7 +30,7 @@ pub(crate) fn emit_natives( MethodCall { callee, arguments: args, - .. + function_id, }: &MethodCall, receiver_ty: TypeId, instructions: &mut Instructions, @@ -36,5 +39,234 @@ pub(crate) fn emit_natives( locals: &mut LocalsLayout, state: &mut EmissionState, ) { - todo!("Emit native function calls") + let function = &ctx.registry[*function_id]; + match function.kind { + FunctionKind::Intrinsic => { + let name = function.fqn.as_os_str().to_str().unwrap(); + let uses = state.use_values(true); + emit(callee, instructions, ctx, cp, locals, state); + if name == "Bool/and" || name == "Exitcode/and" { + instructions.emit_code(Opcode::DupByte); + let end_jump = instructions.emit_jump(if name == "Exitcode/and" { + Opcode::IfJump + } else { + Opcode::IfNotJump + }); + instructions.emit_pop(ValueStackSize::Byte); + emit( + args.first() + .expect("Cannot AND a boolean without a second boolean"), + instructions, + ctx, + cp, + locals, + state, + ); + instructions.patch_jump(end_jump); + state.use_values(uses); + return; + } else if name == "Bool/or" || name == "Exitcode/or" { + instructions.emit_code(Opcode::DupByte); + let else_jump = instructions.emit_jump(if name == "Exitcode/or" { + Opcode::IfJump + } else { + Opcode::IfNotJump + }); + let end_jump = instructions.emit_jump(Opcode::Jump); + instructions.patch_jump(else_jump); + instructions.emit_pop(ValueStackSize::Byte); + emit( + args.first() + .expect("Cannot OR a boolean without a second boolean"), + instructions, + ctx, + cp, + locals, + state, + ); + instructions.patch_jump(end_jump); + state.use_values(uses); + return; + } + for arg in args { + emit(arg, instructions, ctx, cp, locals, state); + } + state.use_values(uses); + match name { + STRING_EQ => { + todo!("Emit string equality") + } + STRING_CONCAT => { + todo!("Emit string concatenation") + } + INT_TO_STRING => { + todo!("Emit int to string") + } + FLOAT_TO_STRING => { + todo!("Emit float to string") + } + STRING_LEN => { + todo!("Emit string length") + } + STRING_INDEX => { + todo!("Emit string index") + } + VEC_INDEX => { + todo!("Emit vec index") + } + VEC_INDEX_EQ => { + todo!("Emit vec index assignment") + } + VEC_POP => { + todo!("Emit vec pop") + } + VEC_PUSH => { + todo!("Emit vec push") + } + VEC_EXTEND => { + todo!("Emit vec extend") + } + VEC_LEN => { + todo!("Emit vec length") + } + VEC_POP_HEAD => { + todo!("Emit vec pop head") + } + STRING_SPLIT => { + todo!("Emit string split") + } + STRING_BYTES => { + todo!("Emit string bytes") + } + "Bool/not" => { + instructions.emit_bool_inversion(); + } + "Bool/eq" => { + instructions.emit_code(Opcode::BXor); + instructions.emit_bool_inversion(); + } + "Bool/ne" => { + instructions.emit_code(Opcode::BXor); + } + "Int/add" => { + instructions.emit_code(Opcode::IntAdd); + } + "Int/sub" => { + instructions.emit_code(Opcode::IntSub); + } + "Int/mul" => { + instructions.emit_code(Opcode::IntMul); + } + "Int/div" => { + instructions.emit_code(Opcode::IntDiv); + } + "Int/mod" => { + instructions.emit_code(Opcode::IntMod); + } + "Int/eq" => { + instructions.emit_code(Opcode::IntEqual); + } + "Int/ne" => { + instructions.emit_code(Opcode::IntEqual); + instructions.emit_bool_inversion(); + } + "Int/lt" => { + instructions.emit_code(Opcode::IntLessThan); + } + "Int/le" => { + instructions.emit_code(Opcode::IntLessOrEqual); + } + "Int/gt" => { + instructions.emit_code(Opcode::IntGreaterThan); + } + "Int/ge" => { + instructions.emit_code(Opcode::IntGreaterOrEqual); + } + "Int/to_exitcode" => { + instructions.emit_code(Opcode::ConvertByteToInt); + } + "Int/to_string" => { + instructions.emit_invoke(cp.insert_string(INT_TO_STRING)); + } + "String/eq" => { + instructions.emit_invoke(cp.insert_string(STRING_EQ)); + } + "String/ne" => { + instructions.emit_invoke(cp.insert_string(STRING_EQ)); + instructions.emit_bool_inversion(); + } + "String/len" => { + instructions.emit_invoke(cp.insert_string(STRING_LEN)); + } + "String/add" | "String/concat" => { + instructions.emit_invoke(cp.insert_string(STRING_CONCAT)); + } + "String/[]" => { + instructions.emit_invoke(cp.insert_string(STRING_INDEX)); + } + "String/split" => { + instructions.emit_invoke(cp.insert_string(STRING_SPLIT)); + } + "String/bytes" => { + instructions.emit_invoke(cp.insert_string(STRING_BYTES)); + } + "Vec/len" => { + instructions.emit_invoke(cp.insert_string(VEC_LEN)); + } + "Vec/[]" => { + instructions.emit_invoke(cp.insert_string(VEC_INDEX)); + } + "Vec/[]=" => { + instructions.emit_invoke(cp.insert_string(VEC_INDEX_EQ)); + } + "Vec/push" => { + instructions.emit_invoke(cp.insert_string(VEC_PUSH)); + } + "Vec/pop" => { + instructions.emit_invoke(cp.insert_string(VEC_POP)); + } + "Vec/pop_head" => { + instructions.emit_invoke(cp.insert_string(VEC_POP_HEAD)); + } + "Vec/extend" => { + instructions.emit_invoke(cp.insert_string(VEC_EXTEND)); + } + "Option/is_some" => { + instructions.emit_push_int(0); + instructions.emit_code(Opcode::IntEqual); + } + "Option/is_none" => { + instructions.emit_push_int(0); + instructions.emit_code(Opcode::IntEqual); + instructions.emit_bool_inversion(); + } + "Option/unwrap" => { + instructions.emit_code(Opcode::Dup); + instructions.emit_push_int(0); + instructions.emit_code(Opcode::IntEqual); + let end_jump = instructions.emit_jump(Opcode::IfNotJump); + instructions.emit_push_constant_ref(cp.insert_string("Cannot unwrap `None`.")); + instructions.emit_invoke(cp.insert_string("std::panic")); + instructions.patch_jump(end_jump); + } + "Glob/expand" => { + instructions.emit_invoke(cp.insert_string(GLOB_EXPAND)); + } + _ => panic!("Unknown `{}` intrinsic", function.fqn.display()), + } + } + _ => panic!( + "Call `{}`, but it's not implemented yet", + function.fqn.display() + ), + } +} + +pub(crate) fn emit_cast(from: TypeId, to: TypeId, instructions: &mut Instructions) { + match (from, to) { + (user::EXITCODE_TYPE, user::BOOL_TYPE) => { + instructions.emit_bool_inversion(); + } + _ => panic!("Emit cast from {from:?} to {to:?}"), + } } diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 0cd97cb0..bdb64ba9 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -1,8 +1,9 @@ +use std::ffi::OsStr; use std::io; use std::io::Write; use ::context::source::ContentId; -use analyzer::hir::{Chunk, EncodableContent, NamedExports}; +use analyzer::hir::{Chunk, EncodableContent, ExprKind, NamedExports}; use analyzer::{Database, Reef}; use crate::bytecode::{Bytecode, InstructionPos, Instructions}; @@ -26,6 +27,11 @@ pub trait SourceLineProvider { fn get_line(&self, content: ContentId, byte_pos: usize) -> Option; } +#[derive(Default)] +pub struct CompilerState { + pub constant_pool: ConstantPool, +} + #[derive(Default)] pub struct CompilerOptions<'a> { pub line_provider: Option<&'a dyn SourceLineProvider>, @@ -38,10 +44,10 @@ pub fn compile_reef( database: &Database, reef: &Reef, writer: &mut impl Write, + CompilerState { constant_pool: cp }: &mut CompilerState, options: CompilerOptions, ) -> Result<(), io::Error> { let mut bytecode = Bytecode::default(); - let mut cp = ConstantPool::default(); let layouts = Vec::::new(); for EncodableContent { @@ -63,20 +69,23 @@ pub fn compile_reef( page_size += size as u32; cp.insert_exported(name, offset, ty.is_obj()); } + if options.last_page_storage_var.is_some() { + page_size += u8::from(ValueStackSize::QWord) as u32; + } - compile_function_chunk(main, exports, &ctx, &mut bytecode, &mut cp, &options); + compile_function_chunk(main, exports, &ctx, &mut bytecode, cp, &options); - write_exported(&mut cp, page_size, &mut bytecode)?; + write_exported(cp, page_size, &mut bytecode)?; bytecode.emit_u32(layouts.len() as u32); bytecode.emit_u32(functions.len() as u32); for function in functions { - compile_function_chunk(function, exports, &ctx, &mut bytecode, &mut cp, &options); + compile_function_chunk(function, exports, &ctx, &mut bytecode, cp, &options); } } - write(writer, &bytecode, &cp)?; + write(writer, &bytecode, cp)?; Ok(()) } @@ -89,7 +98,14 @@ fn compile_function_chunk( options: &CompilerOptions, ) { // emit the function's name - let signature_idx = cp.insert_string(chunk.fqn.display()); + let signature_idx = cp.insert_string( + chunk + .fqn + .iter() + .map(OsStr::to_string_lossy) + .collect::>() + .join("::"), + ); bytecode.emit_constant_ref(signature_idx); // emits chunk's code attribute @@ -195,6 +211,21 @@ fn compile_code( &mut locals, &mut state, ); + if let Some(storage_exported_val) = &options.last_page_storage_var { + let last_expr = if let ExprKind::Block(block) = &chunk.expr.kind { + block.last().unwrap_or(&chunk.expr) + } else { + &chunk.expr + }; + let page_offset = cp.exported.last().map_or(0, |exp| { + exp.page_offset + u8::from(ValueStackSize::QWord) as u32 + }); + cp.insert_exported(storage_exported_val, page_offset, last_expr.ty.is_obj()); + instructions.emit_set_external( + cp.get_external(storage_exported_val).unwrap(), + last_expr.ty.into(), + ); + } // patch instruction count placeholder let instruction_byte_count = instructions.current_ip(); diff --git a/lib/std.msh b/lib/std.msh index e92da6ca..4f9e8cf9 100644 --- a/lib/std.msh +++ b/lib/std.msh @@ -4,6 +4,63 @@ use reef::std::assert use reef::std::convert use reef::std::process +impl Bool { + fun eq(self, other: Bool) -> Bool; + fun ne(self, other: Bool) -> Bool; + fun not(self) -> Bool; + fun and(self, other: Bool) -> Bool; + fun or(self, other: Bool) -> Bool; +} + +impl Exitcode { + fun and(self, other: Exitcode) -> Exitcode; + fun or(self, other: Exitcode) -> Exitcode; +} + +impl Int { + fun add(self, other: Int) -> Int; + fun sub(self, other: Int) -> Int; + fun mul(self, other: Int) -> Int; + fun div(self, other: Int) -> Int; + fun mod(self, other: Int) -> Int; + + fun eq(self, other: Int) -> Bool; + fun ne(self, other: Int) -> Bool; + fun lt(self, other: Int) -> Bool; + fun le(self, other: Int) -> Bool; + fun gt(self, other: Int) -> Bool; + fun ge(self, other: Int) -> Bool; + + fun to_string(self) -> String; + fun to_exitcode(self) -> Exitcode; +} + +impl String { + fun eq(self, other: String) -> Bool; + fun ne(self, other: String) -> Bool; + + fun len(self) -> Int; + fun add(self, other: String) -> String; + fun concat(self, other: String) -> String; + fun split(self, delimiter: String) -> Vec[String]; + fun bytes(self) -> Vec[Int]; +} + +impl[T] Vec[T] { + fun len(self) -> Int; + fun push(self, value: T); + fun pop(self) -> Option[T]; +} + +impl[T] Option[T] { + fun is_some(self) -> Bool; + fun is_none(self) -> Bool; + fun unwrap(self) -> T; +} + +impl Glob { + fun expand(self) -> Vec[String]; +} /// Causes the runtime to panic, with the specified message. /// This also prints the current callstack trace. diff --git a/lib/std/assert.msh b/lib/std/assert.msh index f6b1173a..4b69a77b 100644 --- a/lib/std/assert.msh +++ b/lib/std/assert.msh @@ -5,7 +5,7 @@ fun assert(test: Bool) = { reef::std::panic("assertion failed") } -fun assert_msg(test: Bool, msg) = { +fun assert_msg(test: Bool, msg: String) = { if !$test reef::std::panic("assertion failed: $msg") } \ No newline at end of file diff --git a/vm/Cargo.toml b/vm/Cargo.toml index f43adaa9..298c26da 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -21,7 +21,6 @@ ast = { path = "../ast" } parser = { path = "../parser" } analyzer = { path = "../analyzer" } compiler = { path = "../compiler" } -cli = { path = "../cli" } pretty_assertions = "1.4.0" [build-dependencies] diff --git a/vm/src/definitions/pager.cpp b/vm/src/definitions/pager.cpp index 943dd08d..998862d3 100644 --- a/vm/src/definitions/pager.cpp +++ b/vm/src/definitions/pager.cpp @@ -18,20 +18,20 @@ namespace msh { return pools.at(index); } - pager::page_vector::reverse_iterator pager::begin() { - return pages.rbegin(); + pager::page_vector::iterator pager::begin() { + return pages.begin(); } - pager::page_vector::reverse_iterator pager::end() { - return pages.rend(); + pager::page_vector::iterator pager::end() { + return pages.end(); } - pager::page_vector::const_reverse_iterator pager::cbegin() const { - return pages.rbegin(); + pager::page_vector::const_iterator pager::cbegin() const { + return pages.begin(); } - pager::page_vector::const_reverse_iterator pager::cend() const { - return pages.rend(); + pager::page_vector::const_iterator pager::cend() const { + return pages.end(); } void pager::bind(size_t pool_index, size_t dynsym_id, exported_variable value) { diff --git a/vm/src/definitions/pager.h b/vm/src/definitions/pager.h index 731423c0..80d53827 100644 --- a/vm/src/definitions/pager.h +++ b/vm/src/definitions/pager.h @@ -88,13 +88,13 @@ namespace msh { */ const ConstantPool &get_pool(size_t index) const; - page_vector::reverse_iterator begin(); + page_vector::iterator begin(); - page_vector::reverse_iterator end(); + page_vector::iterator end(); - page_vector::const_reverse_iterator cbegin() const; + page_vector::const_iterator cbegin() const; - page_vector::const_reverse_iterator cend() const; + page_vector::const_iterator cend() const; template T *get_exported_value(exported_variable var) { diff --git a/vm/src/vm.cpp b/vm/src/vm.cpp index b113337e..2c0157df 100644 --- a/vm/src/vm.cpp +++ b/vm/src/vm.cpp @@ -137,9 +137,8 @@ moshell_value moshell_vm_get_exported(moshell_vm vm, const char *name, size_t le int moshell_vm_run(moshell_vm vm) { try { vm->loader.resolve_all(vm->pager); - const auto last = vm->pager.cbegin() + (vm->pager.size() - vm->next_page); - vm->next_page = vm->pager.size(); - for (auto it = vm->pager.cbegin(); it != last; ++it) { + auto it = vm->pager.cbegin() + vm->next_page; + for (vm->next_page = vm->pager.size(); it != vm->pager.cend(); ++it) { const msh::memory_page &page = *it; runtime_memory mem{vm->heap, vm->program_args, vm->gc}; if (!run_unit(vm->thread_stack, vm->loader, vm->pager, page, mem, vm->natives, vm->pgid)) { diff --git a/vm/tests/integration/runner.rs b/vm/tests/integration/runner.rs index b89de522..61f190bc 100644 --- a/vm/tests/integration/runner.rs +++ b/vm/tests/integration/runner.rs @@ -1,74 +1,69 @@ -use std::path::PathBuf; - -use analyzer::importer::{ASTImporter, ImportResult, StaticImporter}; -use analyzer::name::Name; -use analyzer::reef::{Externals, Reef, ReefId}; -use analyzer::relations::SourceId; -use analyzer::types::engine::ChunkKind; -use analyzer::types::ty::{Type, TypeRef}; -use analyzer::{analyze, types, Analyzer, Inject}; -use cli::pipeline::FileImporter; -use compiler::externals::{CompiledReef, CompilerExternals}; -use compiler::{compile_reef, CompilerOptions}; -use parser::parse_trusted; +use std::ffi::OsString; +use std::io; +use std::path::{Path, PathBuf}; + +use analyzer::hir::ExprKind; +use analyzer::typing::user::{TypeId, UserType}; +use analyzer::typing::{registry, user}; +use analyzer::{ + analyze_multi, append_source, freeze_exports, Database, FileImporter, Filesystem, Reef, +}; +use compiler::{compile_reef, CompilerOptions, CompilerState}; use vm::value::VmValue; use vm::{VmError, VmValueFFI, VM}; -pub struct Runner<'a> { - externals: Externals<'a>, - compiler_externals: CompilerExternals, - current_compiled_reef: CompiledReef, +pub struct Runner { + database: Database, + compiler_state: CompilerState, + reef: Reef, vm: VM, - analyzer: Analyzer<'a>, - current_page: Option, } -impl Default for Runner<'_> { +struct EmptyFilesystem; + +impl Filesystem for EmptyFilesystem { + fn read(&self, _path: &Path) -> io::Result { + Err(io::Error::new(io::ErrorKind::NotFound, "file not found")) + } +} + +impl Default for Runner { fn default() -> Self { - let mut externals = Externals::default(); - let mut compiler_externals = CompilerExternals::default(); - let mut std_importer = FileImporter::new(PathBuf::from("../lib")); + let fs = FileImporter::new(PathBuf::from("../lib")); + let mut database = Database::default(); let mut vm = VM::default(); + let mut reef = Reef::new(OsString::from("std")); + let errors = analyze_multi(&mut database, &mut reef, &fs, "std"); + assert!(errors.is_empty()); - let std_name = Name::new("std"); - let analyzer = analyze(std_name.clone(), &mut std_importer, &externals); - let mut buff = Vec::new(); - - let compiled = compile_reef( - &analyzer.engine, - &analyzer.resolution.relations, - &analyzer.typing, - &analyzer.resolution.engine, - &externals, - &compiler_externals, - externals.current, - SourceId(0), - &mut buff, + let mut compiler_state = CompilerState::default(); + let mut bytes = Vec::new(); + compile_reef( + &database, + &reef, + &mut bytes, + &mut compiler_state, CompilerOptions::default(), ) .expect("std did not compile successfully"); - compiler_externals.set(ReefId(1), compiled); + freeze_exports(&mut database, reef); - vm.register(&buff).expect("VM std register"); + vm.register(&bytes).expect("VM std register"); unsafe { vm.run().expect("VM std init"); } - externals.register(Reef::new("std".to_string(), analyzer)); - Self { - externals, + database, + compiler_state, + reef: Reef::new(OsString::from("runner")), vm, - compiler_externals, - current_compiled_reef: CompiledReef::default(), - analyzer: Analyzer::default(), - current_page: None, } } } -impl<'a> Runner<'a> { - pub fn eval(&mut self, expr: &'a str) -> Option { +impl Runner { + pub fn eval(&mut self, expr: &str) -> Option { match self.try_eval(expr) { Ok(v) => v, Err(VmError::Panic) => panic!("VM did panic"), @@ -76,63 +71,42 @@ impl<'a> Runner<'a> { } } - pub fn try_eval(&mut self, source: &'a str) -> Result, VmError> { - let name = Name::new("runner"); - let mut importer = StaticImporter::new([(name.clone(), source)], parse_trusted); - let ImportResult::Success(imported) = importer.import(&name) else { - unreachable!() - }; + pub fn try_eval(&mut self, source: &str) -> Result, VmError> { + let errors = append_source( + &mut self.database, + &mut self.reef, + &EmptyFilesystem, + PathBuf::from("runner"), + source, + ); + assert!(errors.is_empty()); - let inject = Inject { - name: name.clone(), - imported, - attached: self.current_page, + let ExprKind::Block(main_block) = + &self.reef.group_by_content().next().unwrap().main.expr.kind + else { + panic!("no main block found"); }; + let evaluated_expr_type = main_block.last().unwrap().ty; + let expr_value_is_void = + evaluated_expr_type == user::UNIT_TYPE || evaluated_expr_type == user::NOTHING_TYPE; - let mut analysis = self.analyzer.inject(inject, &mut importer, &self.externals); - let page = analysis.attributed_id(); - self.current_page = Some(page); - let diagnostics = analysis.take_diagnostics(); - - let reef = self.externals.current; - - if !diagnostics.is_empty() { - panic!("input had analysis errors: \n{diagnostics:?}") - } let mut bytes = Vec::new(); - - let chunk = self.analyzer.engine.get_user(page).unwrap(); - let ChunkKind::DefinedFunction(Some(eval_expression)) = &chunk.kind else { - unreachable!() - }; - - let evaluated_expr_type = eval_expression.ty; - - let expr_value_is_void = - evaluated_expr_type == types::UNIT || evaluated_expr_type == types::NOTHING; - - self.current_compiled_reef = compile_reef( - &self.analyzer.engine, - &self.analyzer.resolution.relations, - &self.analyzer.typing, - &self.analyzer.resolution.engine, - &self.externals, - &self.compiler_externals, - reef, - page, + compile_reef( + &self.database, + &self.reef, &mut bytes, + &mut self.compiler_state, CompilerOptions { - line_provider: None, - last_page_storage_var: Some(VAR_EXPR_STORAGE.to_string()) - .filter(|_| !expr_value_is_void), + last_page_storage_var: Some(VAR_EXPR_STORAGE.to_string()), + ..CompilerOptions::default() }, ) .expect("write failed"); - self.vm .register(&bytes) .expect("compilation created invalid bytecode"); drop(bytes); + self.reef.clear_cache(); match unsafe { self.vm.run() } { Ok(()) => {} @@ -157,21 +131,25 @@ impl<'a> Runner<'a> { result } - fn extract_value(&self, value: VmValueFFI, value_type: TypeRef) -> Option { + fn extract_value(&self, value: VmValueFFI, value_type: TypeId) -> Option { unsafe { match value_type { - types::BOOL | types::EXITCODE => Some(VmValue::Byte(value.get_as_u8())), - types::FLOAT => Some(VmValue::Double(value.get_as_double())), - types::INT => Some(VmValue::Int(value.get_as_i64())), - types::STRING => Some(VmValue::String(value.get_as_obj().get_as_string())), - types::UNIT | types::NOTHING => Some(VmValue::Void), - _ => match self.get_type(value_type) { - Type::Instantiated(types::GENERIC_OPTION, param) => { + user::BOOL_TYPE | user::EXITCODE_TYPE => Some(VmValue::Byte(value.get_as_u8())), + user::FLOAT_TYPE => Some(VmValue::Double(value.get_as_double())), + user::INT_TYPE => Some(VmValue::Int(value.get_as_i64())), + user::STRING_TYPE => Some(VmValue::String(value.get_as_obj().get_as_string())), + user::UNIT_TYPE | user::NOTHING_TYPE => Some(VmValue::Void), + _ => match &self.database.checker.types[value_type] { + UserType::Parametrized { + schema: registry::OPTION_SCHEMA, + params, + } => { if value.is_ptr_null() { return None; } - let content_type = - *param.first().expect("option instance without content type"); + let [content_type] = params.as_slice() else { + panic!("option instance without content type"); + }; // option can only wrap an object value let value = if content_type.is_obj() { @@ -181,9 +159,12 @@ impl<'a> Runner<'a> { value.get_as_obj().unbox() }; - self.extract_value(value, content_type) + self.extract_value(value, *content_type) } - Type::Instantiated(types::GENERIC_VECTOR, _) => { + UserType::Parametrized { + schema: registry::VEC_SCHEMA, + params: _, + } => { let vec = value .get_as_obj() .get_as_vec() @@ -192,39 +173,31 @@ impl<'a> Runner<'a> { .collect(); Some(VmValue::Vec(vec)) } - Type::Structure(_, structure_id) => { - let structure = self.analyzer.engine.get_structure(*structure_id).unwrap(); - let structure_fields = structure.get_fields(); - let structure_layout = &self.current_compiled_reef.layouts[structure_id.0]; - - let structure_data = value.get_as_obj().get_as_struct(); - let structure_values = structure_fields - .into_iter() - .map(|field| { - let (pos, _) = structure_layout.get_emplacement(field.local_id); - let field_value = VmValueFFI::ptr( - *structure_data.as_ptr().add(pos as usize).cast(), - ); - self.extract_value(field_value, field.ty) - }) - .collect(); - - Some(VmValue::Struct(structure_values)) + UserType::Parametrized { schema, params: _ } => { + let structure = &self.database.checker.registry[*schema]; + // let structure_fields = structure.get_fields(); + // let structure_layout = &self.current_compiled_reef.layouts[structure_id.0]; + // + // let structure_data = value.get_as_obj().get_as_struct(); + // let structure_values = structure_fields + // .into_iter() + // .map(|field| { + // let (pos, _) = structure_layout.get_emplacement(field.local_id); + // let field_value = VmValueFFI::ptr( + // *structure_data.as_ptr().add(pos as usize).cast(), + // ); + // self.extract_value(field_value, field.ty) + // }) + // .collect(); + // + // Some(VmValue::Struct(structure_values)) + todo!() } _ => panic!("unknown object"), }, } } } - - fn get_type(&self, tpe: TypeRef) -> &Type { - let typing = if tpe.reef == self.externals.current { - &self.analyzer.typing - } else { - &self.externals.get_reef(tpe.reef).unwrap().typing - }; - typing.get_type(tpe.type_id).unwrap() - } } // use an invalid name in moshell's language specs From 0cbcd1834be149fdcb1f9241188c5b276b3ddc94 Mon Sep 17 00:00:00 2001 From: maxime Date: Sun, 22 Sep 2024 19:01:15 +0200 Subject: [PATCH 03/11] fix 'cd' command --- .gitignore | 1 + analyzer/src/hir.rs | 4 +- analyzer/src/hoist.rs | 29 ++--- analyzer/src/module.rs | 17 ++- analyzer/src/typing.rs | 182 +---------------------------- analyzer/src/typing/pfc.rs | 219 +++++++++++++++++++++++++++++++++++ analyzer/src/typing/shell.rs | 68 ++++++++++- compiler/src/emit/invoke.rs | 2 + compiler/src/emit/native.rs | 90 +++++++------- 9 files changed, 369 insertions(+), 243 deletions(-) create mode 100644 analyzer/src/typing/pfc.rs diff --git a/.gitignore b/.gitignore index 8d3d626c..73023670 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ a.out moshell *.bin completions +sandbox \ No newline at end of file diff --git a/analyzer/src/hir.rs b/analyzer/src/hir.rs index f102353c..0dec08b5 100644 --- a/analyzer/src/hir.rs +++ b/analyzer/src/hir.rs @@ -1,3 +1,5 @@ +use crate::module::Export; +use crate::symbol::SymbolRegistry; use crate::typing::registry::{FunctionId, SchemaId}; use crate::typing::user::{TypeId, ERROR_TYPE, UNIT_TYPE, UNKNOWN_TYPE}; use crate::typing::variable::{LocalEnvironment, LocalId, Var}; @@ -191,7 +193,7 @@ impl TypedExpr { } } -/// A unit of code. +/// An unit of code. pub struct Chunk { /// The fully qualified name to access this chunk. pub fqn: PathBuf, diff --git a/analyzer/src/hoist.rs b/analyzer/src/hoist.rs index 1450a12a..ec184f78 100644 --- a/analyzer/src/hoist.rs +++ b/analyzer/src/hoist.rs @@ -60,7 +60,7 @@ pub(super) fn hoist_files( requires: Vec::new(), }; hoist_type_names(root, checker, &mut table, &mut exports); - hoist_functions( + hoist_signatures( root, checker, &mut table, @@ -154,8 +154,8 @@ fn hoist_type_names( } } } - -fn hoist_functions( +/// hoists functions, structures and implementations +fn hoist_signatures( root: &[Expr], checker: &mut TypeChecker, table: &mut SymbolTable, @@ -255,6 +255,7 @@ fn hoist_functions( } } } + //TODO implement recursive operation Import::AllIn(_, _) => {} Import::Environment(_) => {} Import::List(_) => {} @@ -269,9 +270,9 @@ fn hoist_functions( } } -struct CurrentType { - current_ty: TypeId, - current_generics: Vec, +struct SelfType { + self_ty: TypeId, + self_generics: Vec, } fn hoist_fn_decl( @@ -283,17 +284,17 @@ fn hoist_fn_decl( return_type, .. }: &FunctionDeclaration, - current_ty: Option, + self_ty: Option, checker: &mut TypeChecker, table: &mut SymbolTable, exports: &mut [Export], errors: &mut Vec, ) { table.enter_scope(); - let (current_ty, mut generic_variables) = match current_ty { - Some(CurrentType { - current_ty, - current_generics, + let (current_ty, mut generic_variables) = match self_ty { + Some(SelfType { + self_ty: current_ty, + self_generics: current_generics, }) => (Some(current_ty), current_generics), None => (None, Vec::new()), }; @@ -510,9 +511,9 @@ fn hoist_impl_decl( }; if impl_ty.is_ok() { for function in functions { - let current = CurrentType { - current_ty: impl_ty, - current_generics: generic_variables.clone(), + let current = SelfType { + self_ty: impl_ty, + self_generics: generic_variables.clone(), }; hoist_fn_decl(function, Some(current), checker, table, exports, errors); } diff --git a/analyzer/src/module.rs b/analyzer/src/module.rs index 61e3d08b..9a583560 100644 --- a/analyzer/src/module.rs +++ b/analyzer/src/module.rs @@ -21,7 +21,7 @@ use crate::{Filesystem, PipelineError, Reef, SourceLocation, UnitKey}; use ast::call::ProgrammaticCall; use ast::function::FunctionDeclaration; use ast::r#use::{Import as ImportExpr, ImportList, ImportedSymbol, InclusionPathItem, Use}; -use ast::variable::VarDeclaration; +use ast::variable::{Identifier, VarDeclaration}; use ast::Expr; use context::source::{SourceSegment, SourceSegmentHolder, Span}; use parser::err::ParseError; @@ -242,6 +242,12 @@ impl ModuleTree { } std::mem::take(&mut current.exports) } + + pub fn find_export(&self, name: &str, symbol_registry: SymbolRegistry) -> Option<&Export> { + self.exports + .iter() + .find(|e| e.name == name && e.registry == symbol_registry) + } } pub(crate) struct ImportResult { @@ -278,6 +284,15 @@ impl<'a> ModuleView<'a> { } Some(tree) } + + pub(crate) fn get_foreign(&self, path: &[&str]) -> Option<&ModuleTree> { + let (first, rest) = path.split_first().expect("path should not be empty"); + let tree = self.foreign.get(OsStr::new(first))?; + + rest.iter().try_fold(tree, |acc, it| { + acc.get(OsStr::new(it)) + }) + } } /// Access all related files starting from the entrypoint. diff --git a/analyzer/src/typing.rs b/analyzer/src/typing.rs index 6fdb1b51..ba4fa067 100644 --- a/analyzer/src/typing.rs +++ b/analyzer/src/typing.rs @@ -3,6 +3,7 @@ mod flow; pub mod function; mod lower; mod operator; +mod pfc; pub mod registry; pub mod schema; mod shell; @@ -19,6 +20,7 @@ use crate::typing::flow::{ascribe_control, ascribe_while}; use crate::typing::function::Function; use crate::typing::lower::{ascribe_template_string, coerce_condition}; use crate::typing::operator::ascribe_binary; +use crate::typing::pfc::ascribe_pfc; use crate::typing::registry::{FunctionId, Registry, SchemaId}; use crate::typing::schema::Schema; use crate::typing::shell::{ @@ -490,186 +492,8 @@ fn ascribe_type( Expr::Parenthesis(paren) => { ascribe_type(&paren.expression, table, checker, storage, ctx, errors) } - Expr::ProgrammaticCall(ProgrammaticCall { - path, - arguments, - type_parameters, - segment: span, - }) => { - let arguments = arguments - .iter() - .map(|expr| ascribe_type(expr, table, checker, storage, ctx, errors)) - .collect::>(); - let ty = lookup_path( - path, - SymbolRegistry::Function, - table, - checker, - modules, - errors, - ); - if ty.is_err() { - return TypedExpr { - kind: ExprKind::Noop, - span: span.clone(), - ty: ERROR_TYPE, - }; - } - let mut type_parameters = type_parameters - .iter() - .map(|type_param| lookup_type(type_param, table, checker, modules, errors)) - .collect::>(); - let UserType::Function(function) = checker.types[ty] else { - panic!( - "function should have a function type {ty:?} {:?}", - &checker.types[ty] - ); - }; - let Function { - ref declared_at, - fqn: _, - ref generic_variables, - ref param_types, - return_type, - kind: _, - } = checker.registry[function]; - let mut return_type = return_type; - if type_parameters.is_empty() && !generic_variables.is_empty() { - // Try to infer the generic types from the actual arguments - type_parameters = vec![UNKNOWN_TYPE; generic_variables.len()]; - for (arg, param) in arguments.iter().zip(param_types.iter()) { - if let Some(generic_variable) = - generic_variables.iter().position(|&ty| ty == param.ty) - { - if type_parameters[generic_variable] != UNKNOWN_TYPE - && type_parameters[generic_variable] != arg.ty - { - errors.push(TypeError::new( - TypeErrorKind::TypeMismatch { - expected: checker.display(type_parameters[generic_variable]), - expected_due_to: None, - actual: checker.display(arg.ty), - }, - SourceLocation::new(table.path().to_owned(), arg.span.clone()), - )); - } else { - type_parameters[generic_variable] = arg.ty; - } - } else if let UserType::Parametrized { - schema: param_schema, - params: param_params, - .. - } = &checker.types[param.ty] - { - if let UserType::Parametrized { - schema, - params: arg_params, - } = &checker.types[arg.ty] - { - if schema == param_schema { - for param_param in param_params { - if let Some(idx) = - generic_variables.iter().position(|&ty| ty == *param_param) - { - type_parameters[idx].define_if_absent(arg_params[idx]); - } - } - } - } - } - } - if let TypeHint::Required(expected_return_ty) = hint { - if let Some(idx) = generic_variables.iter().position(|&ty| ty == return_type) { - type_parameters[idx].define_if_absent(expected_return_ty); - } else if let UserType::Parametrized { - schema: expected_schema, - params: expected_params, - .. - } = &checker.types[expected_return_ty] - { - if let UserType::Parametrized { - schema, - params: fn_return_params, - } = &checker.types[return_type] - { - if schema == expected_schema { - // First, get the index of the generic_variables in the return_params list - for (fn_return_param, fn_actual) in - fn_return_params.iter().zip(expected_params) - { - if let Some(generic_idx) = generic_variables - .iter() - .position(|&ty| ty == *fn_return_param) - { - type_parameters[generic_idx].define_if_absent(*fn_actual); - } - } - } - } - } - } - if type_parameters.iter().any(|ty| *ty == UNKNOWN_TYPE) { - errors.push(TypeError::new( - TypeErrorKind::TypeAnnotationRequired { - types: generic_variables - .iter() - .map(|ty| checker.display(*ty)) - .collect(), - insert_at: path - .last() - .expect("path should have at least one item") - .segment() - .end, - }, - SourceLocation::new(table.path().to_owned(), span.clone()), - )); - return_type = ERROR_TYPE; - } - } - - if arguments.len() != param_types.len() { - errors.push(TypeError::new( - TypeErrorKind::ArityMismatch { - expected: param_types.len(), - received: arguments.len(), - }, - SourceLocation::new(table.path().to_owned(), span.clone()), - )); - } else { - for (arg, param) in arguments.iter().zip(param_types.iter()) { - let param_ty = - checker - .types - .concretize(param.ty, generic_variables, &type_parameters); - if let Err(_) = checker.types.unify(arg.ty, param_ty) { - errors.push(TypeError::new( - TypeErrorKind::TypeMismatch { - expected: checker.display(param_ty), - expected_due_to: Some(SourceLocation::new( - declared_at.clone(), - param.span.clone(), - )), - actual: checker.display(arg.ty), - }, - SourceLocation::new(table.path().to_owned(), arg.span.clone()), - )); - } - } - } - return_type = - checker - .types - .concretize(return_type, generic_variables, &type_parameters); - TypedExpr { - kind: ExprKind::FunctionCall(FunctionCall { - arguments, - function_id: function, - }), - span: span.clone(), - ty: return_type, - } - } Expr::StructDeclaration(decl) => TypedExpr::noop(decl.segment.clone()), + Expr::ProgrammaticCall(call) => ascribe_pfc(call, table, checker, storage, ctx, errors), Expr::FieldAccess(FieldAccess { expr, field, diff --git a/analyzer/src/typing/pfc.rs b/analyzer/src/typing/pfc.rs new file mode 100644 index 00000000..e4c4a31f --- /dev/null +++ b/analyzer/src/typing/pfc.rs @@ -0,0 +1,219 @@ +use crate::hir::{ExprKind, FunctionCall, Module, TypedExpr}; +use crate::symbol::SymbolRegistry; +use crate::typing::function::Function; +use crate::typing::registry::FunctionId; +use crate::typing::user::{TypeId, UserType, ERROR_TYPE, UNKNOWN_TYPE}; +use crate::typing::variable::VariableTable; +use crate::typing::{ + ascribe_type, lookup_path, lookup_type, Context, TypeChecker, TypeError, TypeErrorKind, + TypeHint, +}; +use crate::SourceLocation; +use ast::call::ProgrammaticCall; +use context::source::SourceSegmentHolder; + +pub fn ascribe_pfc( + call: &ProgrammaticCall, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let ty = lookup_path( + &call.path, + SymbolRegistry::Function, + table, + checker, + ctx.modules, + errors, + ); + + if ty.is_err() { + return TypedExpr { + kind: ExprKind::Noop, + span: call.segment(), + ty: ERROR_TYPE, + }; + } + + let UserType::Function(function) = checker.types[ty] else { + panic!( + "function should have a function type {ty:?} {:?}", + &checker.types[ty] + ); + }; + + ascribe_known_pfc(call, function, table, checker, storage, ctx, errors) +} + +/// Generate IHR for a given Programmatic Function Call where the callee is forced to be the given `function_id` argument. +/// +/// This function will ignore the pfc's path that it would normally use to retrieve the targeted function (as in [`ascribe_pfc`]), +/// and will try to match it with the given function +pub fn ascribe_known_pfc( + ProgrammaticCall { + path, + arguments, + type_parameters, + segment: span, + }: &ProgrammaticCall, + function_id: FunctionId, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx @ Context { modules, hint, .. }: Context, + errors: &mut Vec, +) -> TypedExpr { + let arguments = arguments + .iter() + .map(|expr| ascribe_type(expr, table, checker, storage, ctx, errors)) + .collect::>(); + + let mut type_parameters = type_parameters + .iter() + .map(|type_param| lookup_type(type_param, table, checker, modules, errors)) + .collect::>(); + + let Function { + ref declared_at, + ref generic_variables, + ref param_types, + return_type, + .. + } = checker.registry[function_id]; + + let mut return_type = return_type; + if type_parameters.is_empty() && !generic_variables.is_empty() { + // Try to infer the generic types from the actual arguments + type_parameters = vec![UNKNOWN_TYPE; generic_variables.len()]; + for (arg, param) in arguments.iter().zip(param_types.iter()) { + if let Some(generic_variable) = generic_variables.iter().position(|&ty| ty == param.ty) + { + if type_parameters[generic_variable] != UNKNOWN_TYPE + && type_parameters[generic_variable] != arg.ty + { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(type_parameters[generic_variable]), + expected_due_to: None, + actual: checker.display(arg.ty), + }, + SourceLocation::new(table.path().to_owned(), arg.span.clone()), + )); + } else { + type_parameters[generic_variable] = arg.ty; + } + } else if let UserType::Parametrized { + schema: param_schema, + params: param_params, + .. + } = &checker.types[param.ty] + { + if let UserType::Parametrized { + schema, + params: arg_params, + } = &checker.types[arg.ty] + { + if schema == param_schema { + for param_param in param_params { + if let Some(idx) = + generic_variables.iter().position(|&ty| ty == *param_param) + { + type_parameters[idx].define_if_absent(arg_params[idx]); + } + } + } + } + } + } + if let TypeHint::Required(expected_return_ty) = hint { + if let Some(idx) = generic_variables.iter().position(|&ty| ty == return_type) { + type_parameters[idx].define_if_absent(expected_return_ty); + } else if let UserType::Parametrized { + schema: expected_schema, + params: expected_params, + .. + } = &checker.types[expected_return_ty] + { + if let UserType::Parametrized { + schema, + params: fn_return_params, + } = &checker.types[return_type] + { + if schema == expected_schema { + // First, get the index of the generic_variables in the return_params list + for (fn_return_param, fn_actual) in + fn_return_params.iter().zip(expected_params) + { + if let Some(generic_idx) = generic_variables + .iter() + .position(|&ty| ty == *fn_return_param) + { + type_parameters[generic_idx].define_if_absent(*fn_actual); + } + } + } + } + } + } + if type_parameters.iter().any(|ty| *ty == UNKNOWN_TYPE) { + errors.push(TypeError::new( + TypeErrorKind::TypeAnnotationRequired { + types: generic_variables + .iter() + .map(|ty| checker.display(*ty)) + .collect(), + insert_at: path + .last() + .expect("path should have at least one item") + .segment() + .end, + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + return_type = ERROR_TYPE; + } + } + + if arguments.len() != param_types.len() { + errors.push(TypeError::new( + TypeErrorKind::ArityMismatch { + expected: param_types.len(), + received: arguments.len(), + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + } else { + for (arg, param) in arguments.iter().zip(param_types.iter()) { + let param_ty = checker + .types + .concretize(param.ty, generic_variables, &type_parameters); + if let Err(_) = checker.types.unify(arg.ty, param_ty) { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(param_ty), + expected_due_to: Some(SourceLocation::new( + declared_at.clone(), + param.span.clone(), + )), + actual: checker.display(arg.ty), + }, + SourceLocation::new(table.path().to_owned(), arg.span.clone()), + )); + } + } + } + return_type = checker + .types + .concretize(return_type, generic_variables, &type_parameters); + + TypedExpr { + kind: ExprKind::FunctionCall(FunctionCall { + arguments, + function_id, + }), + span: span.clone(), + ty: return_type, + } +} diff --git a/analyzer/src/typing/shell.rs b/analyzer/src/typing/shell.rs index 2f6eaa92..a9926767 100644 --- a/analyzer/src/typing/shell.rs +++ b/analyzer/src/typing/shell.rs @@ -1,17 +1,23 @@ use crate::hir::{ ExprKind, MethodCall, Module, Redir, Redirect, Subprocess, Substitute, TypedExpr, }; +use crate::symbol::SymbolRegistry; use crate::typing::lower::convert_into_string; +use crate::typing::pfc::ascribe_known_pfc; use crate::typing::registry::GLOB_SCHEMA; use crate::typing::user::{ - EXITCODE_TYPE, GLOB_TYPE, INT_TYPE, PID_TYPE, STRING_TYPE, STRING_VECTOR_TYPE, + UserType, EXITCODE_TYPE, GLOB_TYPE, INT_TYPE, PID_TYPE, STRING_TYPE, STRING_VECTOR_TYPE, }; use crate::typing::variable::VariableTable; use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; use crate::SourceLocation; -use ast::call::{Call, Detached, Pipeline, RedirOp, Redirected}; +use ast::call::{Call, Detached, Pipeline, ProgrammaticCall, RedirOp, Redirected}; +use ast::r#use::InclusionPathItem; use ast::range::FilePattern; use ast::substitution::Substitution; +use ast::value::{Literal, LiteralValue}; +use ast::variable::Identifier; +use ast::Expr; use context::source::SourceSegmentHolder; pub(super) fn ascribe_call( @@ -22,6 +28,10 @@ pub(super) fn ascribe_call( ctx: Context, errors: &mut Vec, ) -> TypedExpr { + if let Some(implicit_pfc) = as_implicit_pfc(call, table, checker, storage, ctx, errors) { + return implicit_pfc; + } + let args = call .arguments .iter() @@ -36,7 +46,7 @@ pub(super) fn ascribe_call( &[], STRING_VECTOR_TYPE, ) - .expect("Glob schema does not have a `expand` method"); + .expect("Glob schema does not have an `expand` method"); let span = expr.span.clone(); TypedExpr { kind: ExprKind::MethodCall(MethodCall { @@ -60,6 +70,58 @@ pub(super) fn ascribe_call( } } +fn as_implicit_pfc( + call: &Call, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> Option { + let (cmd, rest) = call.arguments.split_first().expect("at least one argument"); + + let Expr::Literal(Literal { + parsed: LiteralValue::String(cmd_name), + segment, + }) = cmd + else { + return None; + }; + + if cmd_name == "cd" { + let pfc_ast = ProgrammaticCall { + path: vec![InclusionPathItem::Symbol(Identifier::new( + "cd".into(), + segment.start, + ))], + arguments: Vec::from(rest), + type_parameters: vec![], + segment: call.segment(), + }; + + // retrieve the std::cd function type + let std_module = ctx.modules.get_foreign(&["std"]).expect("std module"); + let function_export = std_module + .find_export("cd", SymbolRegistry::Function) + .expect("cd function in std module"); + let UserType::Function(function_id) = checker.types[function_export.ty] else { + panic!("std::cd type is not a function type") + }; + + return Some(ascribe_known_pfc( + &pfc_ast, + function_id, + table, + checker, + storage, + ctx, + errors, + )); + } + + None +} + pub(super) fn ascribe_redirected( redirected: &Redirected, table: &mut VariableTable, diff --git a/compiler/src/emit/invoke.rs b/compiler/src/emit/invoke.rs index f925cf92..d0245175 100644 --- a/compiler/src/emit/invoke.rs +++ b/compiler/src/emit/invoke.rs @@ -171,6 +171,8 @@ fn emit_arguments( for arg in arguments { instructions.emit_code(Opcode::Dup); emit(arg, instructions, ctx, cp, locals, state); + + // the argument type can either be a string, or a string vector (if the argument is an expanded glob) if arg.ty == STRING_TYPE { instructions.emit_invoke(cp.insert_string(VEC_PUSH)); } else { diff --git a/compiler/src/emit/native.rs b/compiler/src/emit/native.rs index 976fb37e..cac29079 100644 --- a/compiler/src/emit/native.rs +++ b/compiler/src/emit/native.rs @@ -93,51 +93,51 @@ pub(crate) fn emit_natives( } state.use_values(uses); match name { - STRING_EQ => { - todo!("Emit string equality") - } - STRING_CONCAT => { - todo!("Emit string concatenation") - } - INT_TO_STRING => { - todo!("Emit int to string") - } - FLOAT_TO_STRING => { - todo!("Emit float to string") - } - STRING_LEN => { - todo!("Emit string length") - } - STRING_INDEX => { - todo!("Emit string index") - } - VEC_INDEX => { - todo!("Emit vec index") - } - VEC_INDEX_EQ => { - todo!("Emit vec index assignment") - } - VEC_POP => { - todo!("Emit vec pop") - } - VEC_PUSH => { - todo!("Emit vec push") - } - VEC_EXTEND => { - todo!("Emit vec extend") - } - VEC_LEN => { - todo!("Emit vec length") - } - VEC_POP_HEAD => { - todo!("Emit vec pop head") - } - STRING_SPLIT => { - todo!("Emit string split") - } - STRING_BYTES => { - todo!("Emit string bytes") - } + // STRING_EQ => { + // todo!("Emit string equality") + // } + // STRING_CONCAT => { + // todo!("Emit string concatenation") + // } + // INT_TO_STRING => { + // todo!("Emit int to string") + // } + // FLOAT_TO_STRING => { + // todo!("Emit float to string") + // } + // STRING_LEN => { + // todo!("Emit string length") + // } + // STRING_INDEX => { + // todo!("Emit string index") + // } + // VEC_INDEX => { + // todo!("Emit vec index") + // } + // VEC_INDEX_EQ => { + // todo!("Emit vec index assignment") + // } + // VEC_POP => { + // todo!("Emit vec pop") + // } + // VEC_PUSH => { + // todo!("Emit vec push") + // } + // VEC_EXTEND => { + // todo!("Emit vec extend") + // } + // VEC_LEN => { + // todo!("Emit vec length") + // } + // VEC_POP_HEAD => { + // todo!("Emit vec pop head") + // } + // STRING_SPLIT => { + // todo!("Emit string split") + // } + // STRING_BYTES => { + // todo!("Emit string bytes") + // } "Bool/not" => { instructions.emit_bool_inversion(); } From 92b04b7d547d6adacf7fda90168b696b90b3e286 Mon Sep 17 00:00:00 2001 From: maxime Date: Mon, 7 Oct 2024 22:32:36 +0200 Subject: [PATCH 04/11] fix global and local shadowing --- analyzer/src/module.rs | 31 +++++++++++------------- analyzer/src/typing/variable.rs | 7 ++++++ cli/lang_tests/flow/both_shadowing.msh | 26 ++++++++++++++++++++ cli/lang_tests/flow/global_shadowing.msh | 22 +++++++++++++++++ cli/lang_tests/flow/local_shadowing.msh | 19 +++++++++++++++ cli/src/library.rs | 8 +++++- 6 files changed, 95 insertions(+), 18 deletions(-) create mode 100644 cli/lang_tests/flow/both_shadowing.msh create mode 100644 cli/lang_tests/flow/global_shadowing.msh create mode 100644 cli/lang_tests/flow/local_shadowing.msh diff --git a/analyzer/src/module.rs b/analyzer/src/module.rs index 9a583560..f1d09460 100644 --- a/analyzer/src/module.rs +++ b/analyzer/src/module.rs @@ -21,7 +21,7 @@ use crate::{Filesystem, PipelineError, Reef, SourceLocation, UnitKey}; use ast::call::ProgrammaticCall; use ast::function::FunctionDeclaration; use ast::r#use::{Import as ImportExpr, ImportList, ImportedSymbol, InclusionPathItem, Use}; -use ast::variable::{Identifier, VarDeclaration}; +use ast::variable::VarDeclaration; use ast::Expr; use context::source::{SourceSegment, SourceSegmentHolder, Span}; use parser::err::ParseError; @@ -289,9 +289,8 @@ impl<'a> ModuleView<'a> { let (first, rest) = path.split_first().expect("path should not be empty"); let tree = self.foreign.get(OsStr::new(first))?; - rest.iter().try_fold(tree, |acc, it| { - acc.get(OsStr::new(it)) - }) + rest.iter() + .try_fold(tree, |acc, it| acc.get(OsStr::new(it))) } } @@ -409,22 +408,20 @@ fn hoist_exports(root: &Root, exports: &mut Vec) -> Vec { }); } } else if let Expr::VarDeclaration(VarDeclaration { var, segment, .. }) = expr { - if let Some(exported) = exports + let export = Export { + name: var.name.to_string(), + span: segment.clone(), + registry: SymbolRegistry::Variable, + ty: UNKNOWN_TYPE, + }; + if let Some(exported_idx) = exports .iter() - .find(|export| export.name == var.name.value.as_str()) + .position(|export| export.name == var.name.value.as_str()) { - duplicates.push(Duplicated { - name: var.name.to_string(), - first: exported.span.clone(), - second: segment.clone(), - }); + // if the root variable was already declared, shadow it with the most recent variable declaration + exports[exported_idx] = export; } else { - exports.push(Export { - name: var.name.to_string(), - span: segment.clone(), - registry: SymbolRegistry::Variable, - ty: UNKNOWN_TYPE, - }); + exports.push(export); } } else if let Expr::StructDeclaration(decl) = expr { if let Some(exported) = exports diff --git a/analyzer/src/typing/variable.rs b/analyzer/src/typing/variable.rs index c9591e6e..02a9c5ed 100644 --- a/analyzer/src/typing/variable.rs +++ b/analyzer/src/typing/variable.rs @@ -174,11 +174,18 @@ impl<'a> VariableTable<'a> { } pub(super) fn pop_environment(&mut self) -> LocalEnvironment { + let current_env_id = self.current_env_id(); + self.symbols_to_locals + .retain(|_, (env_id, _)| *env_id != current_env_id); self.environments .pop() .expect("At least one environment should exist") } + pub(super) fn current_env_id(&self) -> usize { + self.environments.len() - 1 + } + pub(super) fn path(&self) -> &Path { &self.inner.path } diff --git a/cli/lang_tests/flow/both_shadowing.msh b/cli/lang_tests/flow/both_shadowing.msh new file mode 100644 index 00000000..670d69af --- /dev/null +++ b/cli/lang_tests/flow/both_shadowing.msh @@ -0,0 +1,26 @@ +// Run: +// status: success +// stdout: +// Moshell-!-I-love-Moshell,-i-used-to-code-with-Moshell...- +// 1 +// 2 +// 3 + +fun test_shadowing() -> Exitcode = { + val x = 1 + echo $x + val x = 2 + echo $x + var x = 3 + echo $x +} + +val res = "Moshell ! I love Moshell, i used to code with Moshell...".split(' ') +var i = 0 +while $i < $res.len() { + echo -n "${res[$i]}-" + $i += 1 +} +echo + +test_shadowing() \ No newline at end of file diff --git a/cli/lang_tests/flow/global_shadowing.msh b/cli/lang_tests/flow/global_shadowing.msh new file mode 100644 index 00000000..fb90fe7e --- /dev/null +++ b/cli/lang_tests/flow/global_shadowing.msh @@ -0,0 +1,22 @@ +// Run: +// status: success +// stdout: +// 11 +// 12 +// Shadowed Variable ! +// 47 + + +var i = 11 +echo $i + +val i = 12 +echo $i + +var i = "Shadowed Variable !" +echo $i + +val i = 45 + +val i = $i + 2 +echo $i \ No newline at end of file diff --git a/cli/lang_tests/flow/local_shadowing.msh b/cli/lang_tests/flow/local_shadowing.msh new file mode 100644 index 00000000..0a274149 --- /dev/null +++ b/cli/lang_tests/flow/local_shadowing.msh @@ -0,0 +1,19 @@ +// Run: +// status: success +// stdout: +// 1 +// 2 +// 3 + +fun foo() -> Exitcode = { + var x = 1 + echo $x + + var x = 2 + echo $x + + var x = 3 + echo $x +} + +foo() \ No newline at end of file diff --git a/cli/src/library.rs b/cli/src/library.rs index 6b39fb63..01e091cd 100644 --- a/cli/src/library.rs +++ b/cli/src/library.rs @@ -43,12 +43,18 @@ fn find_std() -> PathBuf { #[cfg(unix)] { - for path in ["/usr/local/share/moshell/lib", "/usr/share/moshell/lib"] { + for path in [ + "/usr/local/share/moshell/lib", + "/usr/share/moshell/lib", + #[cfg(debug_assertions)] + "./lib", + ] { let path = Path::new(path); if path.exists() { return path.to_path_buf(); } } } + panic!("Could not determine a valid std emplacement. Please provide a valid stdlib path under a MOSHELL_STD= env variable.") } From 173ee561ce45acfe9d658e4a675010bc93f35019 Mon Sep 17 00:00:00 2001 From: maxime Date: Wed, 9 Oct 2024 22:15:59 +0200 Subject: [PATCH 05/11] fix box/unboxing --- analyzer/src/hoist.rs | 6 +- analyzer/src/typing/user.rs | 7 +- cli/lang_tests/flow/both_shadowing.msh | 2 +- compiler/src/bytecode.rs | 7 + compiler/src/emit/native.rs | 382 +++++++++++-------------- vm/src/interpreter.cpp | 2 +- vm/src/memory/heap.cpp | 4 +- vm/src/memory/heap.h | 6 +- vm/src/stdlib_natives.cpp | 11 +- vm/tests/integration/flow.rs | 6 +- vm/tests/integration/runner.rs | 9 +- 11 files changed, 211 insertions(+), 231 deletions(-) diff --git a/analyzer/src/hoist.rs b/analyzer/src/hoist.rs index ec184f78..c1935a75 100644 --- a/analyzer/src/hoist.rs +++ b/analyzer/src/hoist.rs @@ -182,7 +182,7 @@ fn hoist_signatures( segment: span, }) => { if matches!(path.first(), Some(InclusionPathItem::Symbol(_))) { - return; // Exclude inter-reefs dependencies + continue; // Exclude inter-reefs dependencies } let (last, rest) = path.split_last().expect("at least one item"); if let Some(module) = deps.modules.get_direct(rest) { @@ -214,7 +214,7 @@ fn hoist_signatures( } Import::AllIn(path, _) => { if matches!(path.first(), Some(InclusionPathItem::Symbol(_))) { - return; // Exclude inter-reefs dependencies + continue; // Exclude inter-reefs dependencies } if let Some(module) = deps.modules.get_direct(path) { for export in &module.exports { @@ -229,7 +229,7 @@ fn hoist_signatures( segment: span, }) => { if matches!(root.first(), Some(InclusionPathItem::Symbol(_))) { - return; // Exclude inter-reefs dependencies + continue; // Exclude inter-reefs dependencies } let base = root .iter() diff --git a/analyzer/src/typing/user.rs b/analyzer/src/typing/user.rs index 2c1fb5d6..fb1e88a6 100644 --- a/analyzer/src/typing/user.rs +++ b/analyzer/src/typing/user.rs @@ -94,12 +94,7 @@ impl TypeArena { } /// Given a possible generic type, create a parameterized variant for the given context. - pub(crate) fn concretize( - &mut self, - ty: TypeId, - generics: &[TypeId], - params: &[TypeId], - ) -> TypeId { + pub fn concretize(&mut self, ty: TypeId, generics: &[TypeId], params: &[TypeId]) -> TypeId { assert_eq!(generics.len(), params.len(), "expected same length between generics {generics:?} and their concretized counterparts {params:?}"); match &self[ty] { UserType::Parametrized { diff --git a/cli/lang_tests/flow/both_shadowing.msh b/cli/lang_tests/flow/both_shadowing.msh index 670d69af..2568a0b4 100644 --- a/cli/lang_tests/flow/both_shadowing.msh +++ b/cli/lang_tests/flow/both_shadowing.msh @@ -21,6 +21,6 @@ while $i < $res.len() { echo -n "${res[$i]}-" $i += 1 } -echo +echo test_shadowing() \ No newline at end of file diff --git a/compiler/src/bytecode.rs b/compiler/src/bytecode.rs index 028af7b3..1ff4b0c2 100644 --- a/compiler/src/bytecode.rs +++ b/compiler/src/bytecode.rs @@ -309,6 +309,13 @@ impl<'a> Instructions<'a> { /// It returns the [`Placeholder`] address of the offset which is to be patched. #[must_use = "the jump address must be patched later"] pub fn emit_jump(&mut self, opcode: Opcode) -> Placeholder { + debug_assert!( + matches!( + opcode, + Opcode::Jump | Opcode::IfJump | Opcode::IfNotJump | Opcode::Fork + ), + "input opcode must be a jump instruction" + ); self.emit_code(opcode); self.bytecode.emit_u32_placeholder() } diff --git a/compiler/src/emit/native.rs b/compiler/src/emit/native.rs index cac29079..3793de85 100644 --- a/compiler/src/emit/native.rs +++ b/compiler/src/emit/native.rs @@ -6,7 +6,7 @@ use crate::r#type::ValueStackSize; use analyzer::hir::MethodCall; use analyzer::typing::function::FunctionKind; use analyzer::typing::user; -use analyzer::typing::user::TypeId; +use analyzer::typing::user::{TypeId, UserType}; const STRING_EQ: &str = "lang::String::eq"; const STRING_CONCAT: &str = "lang::String::concat"; @@ -45,215 +45,23 @@ pub(crate) fn emit_natives( let name = function.fqn.as_os_str().to_str().unwrap(); let uses = state.use_values(true); emit(callee, instructions, ctx, cp, locals, state); - if name == "Bool/and" || name == "Exitcode/and" { - instructions.emit_code(Opcode::DupByte); - let end_jump = instructions.emit_jump(if name == "Exitcode/and" { - Opcode::IfJump - } else { - Opcode::IfNotJump - }); - instructions.emit_pop(ValueStackSize::Byte); - emit( - args.first() - .expect("Cannot AND a boolean without a second boolean"), - instructions, - ctx, - cp, - locals, - state, - ); - instructions.patch_jump(end_jump); - state.use_values(uses); - return; - } else if name == "Bool/or" || name == "Exitcode/or" { - instructions.emit_code(Opcode::DupByte); - let else_jump = instructions.emit_jump(if name == "Exitcode/or" { - Opcode::IfJump - } else { - Opcode::IfNotJump - }); - let end_jump = instructions.emit_jump(Opcode::Jump); - instructions.patch_jump(else_jump); - instructions.emit_pop(ValueStackSize::Byte); - emit( - args.first() - .expect("Cannot OR a boolean without a second boolean"), - instructions, - ctx, - cp, - locals, - state, - ); - instructions.patch_jump(end_jump); - state.use_values(uses); - return; - } - for arg in args { - emit(arg, instructions, ctx, cp, locals, state); - } - state.use_values(uses); - match name { - // STRING_EQ => { - // todo!("Emit string equality") - // } - // STRING_CONCAT => { - // todo!("Emit string concatenation") - // } - // INT_TO_STRING => { - // todo!("Emit int to string") - // } - // FLOAT_TO_STRING => { - // todo!("Emit float to string") - // } - // STRING_LEN => { - // todo!("Emit string length") - // } - // STRING_INDEX => { - // todo!("Emit string index") - // } - // VEC_INDEX => { - // todo!("Emit vec index") - // } - // VEC_INDEX_EQ => { - // todo!("Emit vec index assignment") - // } - // VEC_POP => { - // todo!("Emit vec pop") - // } - // VEC_PUSH => { - // todo!("Emit vec push") - // } - // VEC_EXTEND => { - // todo!("Emit vec extend") - // } - // VEC_LEN => { - // todo!("Emit vec length") - // } - // VEC_POP_HEAD => { - // todo!("Emit vec pop head") - // } - // STRING_SPLIT => { - // todo!("Emit string split") - // } - // STRING_BYTES => { - // todo!("Emit string bytes") - // } - "Bool/not" => { - instructions.emit_bool_inversion(); - } - "Bool/eq" => { - instructions.emit_code(Opcode::BXor); - instructions.emit_bool_inversion(); - } - "Bool/ne" => { - instructions.emit_code(Opcode::BXor); - } - "Int/add" => { - instructions.emit_code(Opcode::IntAdd); - } - "Int/sub" => { - instructions.emit_code(Opcode::IntSub); - } - "Int/mul" => { - instructions.emit_code(Opcode::IntMul); - } - "Int/div" => { - instructions.emit_code(Opcode::IntDiv); - } - "Int/mod" => { - instructions.emit_code(Opcode::IntMod); - } - "Int/eq" => { - instructions.emit_code(Opcode::IntEqual); - } - "Int/ne" => { - instructions.emit_code(Opcode::IntEqual); - instructions.emit_bool_inversion(); - } - "Int/lt" => { - instructions.emit_code(Opcode::IntLessThan); - } - "Int/le" => { - instructions.emit_code(Opcode::IntLessOrEqual); - } - "Int/gt" => { - instructions.emit_code(Opcode::IntGreaterThan); - } - "Int/ge" => { - instructions.emit_code(Opcode::IntGreaterOrEqual); - } - "Int/to_exitcode" => { - instructions.emit_code(Opcode::ConvertByteToInt); - } - "Int/to_string" => { - instructions.emit_invoke(cp.insert_string(INT_TO_STRING)); - } - "String/eq" => { - instructions.emit_invoke(cp.insert_string(STRING_EQ)); - } - "String/ne" => { - instructions.emit_invoke(cp.insert_string(STRING_EQ)); - instructions.emit_bool_inversion(); - } - "String/len" => { - instructions.emit_invoke(cp.insert_string(STRING_LEN)); - } - "String/add" | "String/concat" => { - instructions.emit_invoke(cp.insert_string(STRING_CONCAT)); - } - "String/[]" => { - instructions.emit_invoke(cp.insert_string(STRING_INDEX)); - } - "String/split" => { - instructions.emit_invoke(cp.insert_string(STRING_SPLIT)); - } - "String/bytes" => { - instructions.emit_invoke(cp.insert_string(STRING_BYTES)); - } - "Vec/len" => { - instructions.emit_invoke(cp.insert_string(VEC_LEN)); - } - "Vec/[]" => { - instructions.emit_invoke(cp.insert_string(VEC_INDEX)); - } - "Vec/[]=" => { - instructions.emit_invoke(cp.insert_string(VEC_INDEX_EQ)); - } - "Vec/push" => { - instructions.emit_invoke(cp.insert_string(VEC_PUSH)); - } - "Vec/pop" => { - instructions.emit_invoke(cp.insert_string(VEC_POP)); - } - "Vec/pop_head" => { - instructions.emit_invoke(cp.insert_string(VEC_POP_HEAD)); - } - "Vec/extend" => { - instructions.emit_invoke(cp.insert_string(VEC_EXTEND)); - } - "Option/is_some" => { - instructions.emit_push_int(0); - instructions.emit_code(Opcode::IntEqual); - } - "Option/is_none" => { - instructions.emit_push_int(0); - instructions.emit_code(Opcode::IntEqual); - instructions.emit_bool_inversion(); - } - "Option/unwrap" => { - instructions.emit_code(Opcode::Dup); - instructions.emit_push_int(0); - instructions.emit_code(Opcode::IntEqual); - let end_jump = instructions.emit_jump(Opcode::IfNotJump); - instructions.emit_push_constant_ref(cp.insert_string("Cannot unwrap `None`.")); - instructions.emit_invoke(cp.insert_string("std::panic")); - instructions.patch_jump(end_jump); - } - "Glob/expand" => { - instructions.emit_invoke(cp.insert_string(GLOB_EXPAND)); + + emit_intrinsic_instructions(name, instructions, cp, |instructions, cp| { + for (arg, param) in args.iter().zip(function.param_types.iter().skip(1)) { + emit(arg, instructions, ctx, cp, locals, state); + + // The parameter is an object but the argument isn't: may be an argument passed to a generic parameter + if param.ty.is_obj() && !arg.ty.is_obj() { + instructions.emit_box_if_primitive(arg.ty) + } } - _ => panic!("Unknown `{}` intrinsic", function.fqn.display()), + }); + + if function.return_type.is_obj() && !receiver_ty.is_obj() { + // The function's declared return type is an object but the call return type is not: it's a boxed return value + instructions.emit_code(Opcode::Unbox); } + state.use_values(uses); } _ => panic!( "Call `{}`, but it's not implemented yet", @@ -262,6 +70,164 @@ pub(crate) fn emit_natives( } } +fn emit_intrinsic_instructions( + intrinsic_fn_name: &str, + instructions: &mut Instructions, + cp: &mut ConstantPool, + emit_args: impl FnOnce(&mut Instructions, &mut ConstantPool), +) { + match intrinsic_fn_name { + "Bool/and" | "Exitcode/and" => { + instructions.emit_code(Opcode::DupByte); + let end_jump = instructions.emit_jump(if intrinsic_fn_name == "Exitcode/and" { + Opcode::IfJump + } else { + Opcode::IfNotJump + }); + instructions.emit_pop(ValueStackSize::Byte); + emit_args(instructions, cp); + instructions.patch_jump(end_jump); + + return; + } + "Bool/or" | "Exitcode/or" => { + instructions.emit_code(Opcode::DupByte); + let else_jump = instructions.emit_jump(if intrinsic_fn_name == "Exitcode/or" { + Opcode::IfJump + } else { + Opcode::IfNotJump + }); + let end_jump = instructions.emit_jump(Opcode::Jump); + instructions.patch_jump(else_jump); + instructions.emit_pop(ValueStackSize::Byte); + + emit_args(instructions, cp); + instructions.patch_jump(end_jump); + + return; + } + _ => emit_args(instructions, cp), + } + + match intrinsic_fn_name { + "Bool/not" => { + instructions.emit_bool_inversion(); + } + "Bool/eq" => { + instructions.emit_code(Opcode::BXor); + instructions.emit_bool_inversion(); + } + "Bool/ne" => { + instructions.emit_code(Opcode::BXor); + } + "Int/add" => { + instructions.emit_code(Opcode::IntAdd); + } + "Int/sub" => { + instructions.emit_code(Opcode::IntSub); + } + "Int/mul" => { + instructions.emit_code(Opcode::IntMul); + } + "Int/div" => { + instructions.emit_code(Opcode::IntDiv); + } + "Int/mod" => { + instructions.emit_code(Opcode::IntMod); + } + "Int/eq" => { + instructions.emit_code(Opcode::IntEqual); + } + "Int/ne" => { + instructions.emit_code(Opcode::IntEqual); + instructions.emit_bool_inversion(); + } + "Int/lt" => { + instructions.emit_code(Opcode::IntLessThan); + } + "Int/le" => { + instructions.emit_code(Opcode::IntLessOrEqual); + } + "Int/gt" => { + instructions.emit_code(Opcode::IntGreaterThan); + } + "Int/ge" => { + instructions.emit_code(Opcode::IntGreaterOrEqual); + } + "Int/to_exitcode" => { + instructions.emit_code(Opcode::ConvertByteToInt); + } + "Int/to_string" => { + instructions.emit_invoke(cp.insert_string(INT_TO_STRING)); + } + "String/eq" => { + instructions.emit_invoke(cp.insert_string(STRING_EQ)); + } + "String/ne" => { + instructions.emit_invoke(cp.insert_string(STRING_EQ)); + instructions.emit_bool_inversion(); + } + "String/len" => { + instructions.emit_invoke(cp.insert_string(STRING_LEN)); + } + "String/add" | "String/concat" => { + instructions.emit_invoke(cp.insert_string(STRING_CONCAT)); + } + "String/[]" => { + instructions.emit_invoke(cp.insert_string(STRING_INDEX)); + } + "String/split" => { + instructions.emit_invoke(cp.insert_string(STRING_SPLIT)); + } + "String/bytes" => { + instructions.emit_invoke(cp.insert_string(STRING_BYTES)); + } + "Vec/len" => { + instructions.emit_invoke(cp.insert_string(VEC_LEN)); + } + "Vec/[]" => { + instructions.emit_invoke(cp.insert_string(VEC_INDEX)); + } + "Vec/[]=" => { + instructions.emit_invoke(cp.insert_string(VEC_INDEX_EQ)); + } + "Vec/push" => { + instructions.emit_invoke(cp.insert_string(VEC_PUSH)); + } + "Vec/pop" => { + instructions.emit_invoke(cp.insert_string(VEC_POP)); + } + "Vec/pop_head" => { + instructions.emit_invoke(cp.insert_string(VEC_POP_HEAD)); + } + "Vec/extend" => { + instructions.emit_invoke(cp.insert_string(VEC_EXTEND)); + } + "Option/is_some" => { + instructions.emit_push_int(0); + instructions.emit_code(Opcode::IntEqual); + } + "Option/is_none" => { + instructions.emit_push_int(0); + instructions.emit_code(Opcode::IntEqual); + instructions.emit_bool_inversion(); + } + "Option/unwrap" => { + instructions.emit_code(Opcode::Dup); + instructions.emit_push_int(0); + instructions.emit_code(Opcode::IntEqual); + let end_jump = instructions.emit_jump(Opcode::IfNotJump); + instructions.emit_push_constant_ref(cp.insert_string("Cannot unwrap `None`.")); + instructions.emit_invoke(cp.insert_string("std::panic")); + instructions.patch_jump(end_jump); + } + "Glob/expand" => { + instructions.emit_invoke(cp.insert_string(GLOB_EXPAND)); + } + _ => panic!("Unknown `{intrinsic_fn_name}` intrinsic"), + } +} + pub(crate) fn emit_cast(from: TypeId, to: TypeId, instructions: &mut Instructions) { match (from, to) { (user::EXITCODE_TYPE, user::BOOL_TYPE) => { diff --git a/vm/src/interpreter.cpp b/vm/src/interpreter.cpp index b8322121..dd4188a2 100644 --- a/vm/src/interpreter.cpp +++ b/vm/src/interpreter.cpp @@ -165,7 +165,7 @@ std::vector &runtime_memory::program_arguments() { msh::obj &runtime_memory::emplace(msh::obj_data &&data) { if (heap.size() >= last_gc_heap_size + GC_HEAP_CYCLE) run_gc(); - return this->heap.insert(data); + return this->heap.insert(std::move(data)); } /** diff --git a/vm/src/memory/heap.cpp b/vm/src/memory/heap.cpp index 019d0219..a1f72e66 100644 --- a/vm/src/memory/heap.cpp +++ b/vm/src/memory/heap.cpp @@ -10,8 +10,8 @@ namespace msh { return data; } - obj &heap::insert(msh::obj &&obj) { - objects.push_front(std::forward(obj)); + obj &heap::insert(obj_data &&obj) { + objects.emplace_front(obj); len++; return objects.front(); } diff --git a/vm/src/memory/heap.h b/vm/src/memory/heap.h index 26d58046..7eb6bed0 100644 --- a/vm/src/memory/heap.h +++ b/vm/src/memory/heap.h @@ -42,10 +42,14 @@ namespace msh { friend gc; + obj(obj& other) = default; + obj(obj&& other) = default; + public: template obj(T val) : gc_cycle{0}, data{std::move(val)} {} + obj_data &get_data(); const obj_data &get_data() const; @@ -91,7 +95,7 @@ namespace msh { * @param obj The object to insert. * @return A reference to this object, valid as long as the object is not deleted. */ - msh::obj &insert(msh::obj &&obj); + msh::obj &insert(msh::obj_data &&obj); size_t size() const; }; diff --git a/vm/src/stdlib_natives.cpp b/vm/src/stdlib_natives.cpp index a9059870..7bea9ad4 100644 --- a/vm/src/stdlib_natives.cpp +++ b/vm/src/stdlib_natives.cpp @@ -288,7 +288,8 @@ static void vec_index(OperandStack &caller_stack, runtime_memory &) { if (index >= vec.size()) { throw RuntimeException("Index " + std::to_string(n) + " is out of range, the length is " + std::to_string(vec.size()) + "."); } - caller_stack.push_reference(*vec[index]); + msh::obj& ref = *vec[index]; + caller_stack.push_reference(ref); } static void vec_index_set(OperandStack &caller_stack, runtime_memory &) { @@ -409,10 +410,10 @@ natives_functions_t load_natives() { {"std::memory::empty_operands", is_operands_empty}, {"std::memory::program_arguments", program_arguments}, - {"std::convert::ceil", ceil}, - {"std::convert::floor", floor}, - {"std::convert::round", round}, - {"std::convert::parse_int_radix", parse_int_radix}, + {"std::math::ceil", ceil}, + {"std::math::floor", floor}, + {"std::math::round", round}, + {"std::math::parse_int_radix", parse_int_radix}, {"std::process::get_fd_path", get_fd_path}, {"std::process::wait", process_wait}, diff --git a/vm/tests/integration/flow.rs b/vm/tests/integration/flow.rs index f2a9a4f2..b6d52824 100644 --- a/vm/tests/integration/flow.rs +++ b/vm/tests/integration/flow.rs @@ -166,7 +166,7 @@ fn simple_function_call() { #[test] fn operators() { let mut runner = Runner::default(); - runner.eval("use std::assert::*"); + runner.eval("use std::assert"); runner.eval( " assert(1 + 1 == 2) @@ -199,8 +199,8 @@ fn str_bytes() { let mut runner = Runner::default(); runner.eval("val letters = 'abcdefghijklmnopqrstuvwxy'.bytes()"); runner.eval("$letters.push(122)"); - assert_eq!(runner.eval("$letters[0]"), Some(VmValue::Int(97))); assert_eq!(runner.eval("$letters[25]"), Some(VmValue::Int(122))); + assert_eq!(runner.eval("$letters[0]"), Some(VmValue::Int(97))); } #[test] @@ -231,7 +231,7 @@ fn str_split() { fn exitcode_to_bool() { let mut runner = Runner::default(); runner.eval( - "use std::assert::assert + "use std::assert assert({ /bin/true }) assert(!{ /bin/false }) assert({ ! /bin/false })", diff --git a/vm/tests/integration/runner.rs b/vm/tests/integration/runner.rs index 61f190bc..b30e4933 100644 --- a/vm/tests/integration/runner.rs +++ b/vm/tests/integration/runner.rs @@ -79,7 +79,7 @@ impl Runner { PathBuf::from("runner"), source, ); - assert!(errors.is_empty()); + assert!(errors.is_empty(), "source code lifts compilation errors"); let ExprKind::Block(main_block) = &self.reef.group_by_content().next().unwrap().main.expr.kind @@ -256,6 +256,13 @@ mod test { ) } + #[test] + fn test_runner_variable_ref() { + let mut runner = Runner::default(); + runner.eval("val x = 9"); + assert_eq!(runner.eval("$x"), Some(VmValue::Int(9))) + } + #[test] fn test_runner_string() { let mut runner = Runner::default(); From 09f44f6ef146dda1cbe3864c995a9c0a901a5616 Mon Sep 17 00:00:00 2001 From: syldium Date: Wed, 9 Oct 2024 20:56:25 +0200 Subject: [PATCH 06/11] Find functions with fully qualified paths --- analyzer/src/module.rs | 8 ------ analyzer/src/typing.rs | 28 +++++++++++-------- analyzer/src/typing/pfc.rs | 53 ++++++++++-------------------------- analyzer/src/typing/shell.rs | 45 ++++++++++-------------------- 4 files changed, 45 insertions(+), 89 deletions(-) diff --git a/analyzer/src/module.rs b/analyzer/src/module.rs index f1d09460..a742799b 100644 --- a/analyzer/src/module.rs +++ b/analyzer/src/module.rs @@ -284,14 +284,6 @@ impl<'a> ModuleView<'a> { } Some(tree) } - - pub(crate) fn get_foreign(&self, path: &[&str]) -> Option<&ModuleTree> { - let (first, rest) = path.split_first().expect("path should not be empty"); - let tree = self.foreign.get(OsStr::new(first))?; - - rest.iter() - .try_fold(tree, |acc, it| acc.get(OsStr::new(it))) - } } /// Access all related files starting from the entrypoint. diff --git a/analyzer/src/typing.rs b/analyzer/src/typing.rs index ba4fa067..d5f2063e 100644 --- a/analyzer/src/typing.rs +++ b/analyzer/src/typing.rs @@ -10,7 +10,7 @@ mod shell; pub mod user; pub mod variable; -use crate::hir::{Conditional, Declaration, ExprKind, FunctionCall, Module, TypedExpr}; +use crate::hir::{Conditional, Declaration, ExprKind, Module, TypedExpr}; use crate::module::ModuleView; use crate::symbol::{Symbol, SymbolDesc, SymbolRegistry, UndefinedSymbol}; use crate::typing::assign::{ @@ -33,7 +33,7 @@ use crate::typing::user::{ }; use crate::typing::variable::{SymbolEntry, VariableTable}; use crate::{Database, PipelineError, Reef, SourceLocation, UnitKey}; -use ast::call::{MethodCall, ProgrammaticCall}; +use ast::call::MethodCall; use ast::control_flow::If; use ast::function::FunctionDeclaration; use ast::group::Block; @@ -1022,15 +1022,19 @@ fn lookup_path( } } Err(err) => { - errors.push(TypeError::new( - TypeErrorKind::UndefinedSymbol { - name: ident.value.to_string(), - expected: registry, - found: err.into(), - }, - SourceLocation::new(table.path().to_owned(), ident.segment()), - )); - return ERROR_TYPE; + if let Some(module) = modules.get(first) { + module + } else { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: ident.value.to_string(), + expected: registry, + found: err.into(), + }, + SourceLocation::new(table.path().to_owned(), ident.segment()), + )); + return ERROR_TYPE; + } } }, InclusionPathItem::Reef(_) => modules.current, @@ -1077,7 +1081,7 @@ fn lookup_path( errors.push(TypeError::new( TypeErrorKind::UndefinedSymbol { name: last.to_string(), - expected: SymbolRegistry::Type, + expected: registry, found: None, }, SourceLocation::new(table.path().to_owned(), ident.segment()), diff --git a/analyzer/src/typing/pfc.rs b/analyzer/src/typing/pfc.rs index e4c4a31f..f64ddd21 100644 --- a/analyzer/src/typing/pfc.rs +++ b/analyzer/src/typing/pfc.rs @@ -1,7 +1,6 @@ use crate::hir::{ExprKind, FunctionCall, Module, TypedExpr}; use crate::symbol::SymbolRegistry; use crate::typing::function::Function; -use crate::typing::registry::FunctionId; use crate::typing::user::{TypeId, UserType, ERROR_TYPE, UNKNOWN_TYPE}; use crate::typing::variable::VariableTable; use crate::typing::{ @@ -13,63 +12,41 @@ use ast::call::ProgrammaticCall; use context::source::SourceSegmentHolder; pub fn ascribe_pfc( - call: &ProgrammaticCall, + ProgrammaticCall { + path, + arguments, + type_parameters, + segment: span, + }: &ProgrammaticCall, table: &mut VariableTable, checker: &mut TypeChecker, storage: &mut Module, - ctx: Context, + ctx @ Context { modules, hint, .. }: Context, errors: &mut Vec, ) -> TypedExpr { + let arguments = arguments + .iter() + .map(|expr| ascribe_type(expr, table, checker, storage, ctx, errors)) + .collect::>(); let ty = lookup_path( - &call.path, + path, SymbolRegistry::Function, table, checker, - ctx.modules, + modules, errors, ); - if ty.is_err() { - return TypedExpr { - kind: ExprKind::Noop, - span: call.segment(), - ty: ERROR_TYPE, - }; + return TypedExpr::error(span.clone()); } - let UserType::Function(function) = checker.types[ty] else { + let UserType::Function(function_id) = checker.types[ty] else { panic!( "function should have a function type {ty:?} {:?}", &checker.types[ty] ); }; - ascribe_known_pfc(call, function, table, checker, storage, ctx, errors) -} - -/// Generate IHR for a given Programmatic Function Call where the callee is forced to be the given `function_id` argument. -/// -/// This function will ignore the pfc's path that it would normally use to retrieve the targeted function (as in [`ascribe_pfc`]), -/// and will try to match it with the given function -pub fn ascribe_known_pfc( - ProgrammaticCall { - path, - arguments, - type_parameters, - segment: span, - }: &ProgrammaticCall, - function_id: FunctionId, - table: &mut VariableTable, - checker: &mut TypeChecker, - storage: &mut Module, - ctx @ Context { modules, hint, .. }: Context, - errors: &mut Vec, -) -> TypedExpr { - let arguments = arguments - .iter() - .map(|expr| ascribe_type(expr, table, checker, storage, ctx, errors)) - .collect::>(); - let mut type_parameters = type_parameters .iter() .map(|type_param| lookup_type(type_param, table, checker, modules, errors)) diff --git a/analyzer/src/typing/shell.rs b/analyzer/src/typing/shell.rs index a9926767..8ad1b3d1 100644 --- a/analyzer/src/typing/shell.rs +++ b/analyzer/src/typing/shell.rs @@ -1,12 +1,11 @@ use crate::hir::{ ExprKind, MethodCall, Module, Redir, Redirect, Subprocess, Substitute, TypedExpr, }; -use crate::symbol::SymbolRegistry; use crate::typing::lower::convert_into_string; -use crate::typing::pfc::ascribe_known_pfc; +use crate::typing::pfc::ascribe_pfc; use crate::typing::registry::GLOB_SCHEMA; use crate::typing::user::{ - UserType, EXITCODE_TYPE, GLOB_TYPE, INT_TYPE, PID_TYPE, STRING_TYPE, STRING_VECTOR_TYPE, + EXITCODE_TYPE, GLOB_TYPE, INT_TYPE, PID_TYPE, STRING_TYPE, STRING_VECTOR_TYPE, }; use crate::typing::variable::VariableTable; use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; @@ -78,45 +77,29 @@ fn as_implicit_pfc( ctx: Context, errors: &mut Vec, ) -> Option { - let (cmd, rest) = call.arguments.split_first().expect("at least one argument"); - - let Expr::Literal(Literal { - parsed: LiteralValue::String(cmd_name), - segment, - }) = cmd + let ( + Expr::Literal(Literal { + parsed: LiteralValue::String(cmd_name), + segment, + }), + rest, + ) = call.arguments.split_first().expect("at least one argument") else { return None; }; if cmd_name == "cd" { let pfc_ast = ProgrammaticCall { - path: vec![InclusionPathItem::Symbol(Identifier::new( - "cd".into(), - segment.start, - ))], + path: vec![ + InclusionPathItem::Symbol(Identifier::new("std".into(), segment.start)), + InclusionPathItem::Symbol(Identifier::new("cd".into(), segment.start)), + ], arguments: Vec::from(rest), type_parameters: vec![], segment: call.segment(), }; - // retrieve the std::cd function type - let std_module = ctx.modules.get_foreign(&["std"]).expect("std module"); - let function_export = std_module - .find_export("cd", SymbolRegistry::Function) - .expect("cd function in std module"); - let UserType::Function(function_id) = checker.types[function_export.ty] else { - panic!("std::cd type is not a function type") - }; - - return Some(ascribe_known_pfc( - &pfc_ast, - function_id, - table, - checker, - storage, - ctx, - errors, - )); + return Some(ascribe_pfc(&pfc_ast, table, checker, storage, ctx, errors)); } None From ad14bb7e751f35ad76266f41fd7f20869982c3b8 Mon Sep 17 00:00:00 2001 From: syldium Date: Wed, 9 Oct 2024 22:17:39 +0200 Subject: [PATCH 07/11] Check unary expressions --- analyzer/src/hir.rs | 2 - analyzer/src/typing.rs | 7 +--- analyzer/src/typing/operator.rs | 74 ++++++++++++++++++++++++++++++++- ast/src/operation.rs | 2 +- 4 files changed, 76 insertions(+), 9 deletions(-) diff --git a/analyzer/src/hir.rs b/analyzer/src/hir.rs index 0dec08b5..5daf6d69 100644 --- a/analyzer/src/hir.rs +++ b/analyzer/src/hir.rs @@ -1,5 +1,3 @@ -use crate::module::Export; -use crate::symbol::SymbolRegistry; use crate::typing::registry::{FunctionId, SchemaId}; use crate::typing::user::{TypeId, ERROR_TYPE, UNIT_TYPE, UNKNOWN_TYPE}; use crate::typing::variable::{LocalEnvironment, LocalId, Var}; diff --git a/analyzer/src/typing.rs b/analyzer/src/typing.rs index d5f2063e..1733f621 100644 --- a/analyzer/src/typing.rs +++ b/analyzer/src/typing.rs @@ -19,7 +19,7 @@ use crate::typing::assign::{ use crate::typing::flow::{ascribe_control, ascribe_while}; use crate::typing::function::Function; use crate::typing::lower::{ascribe_template_string, coerce_condition}; -use crate::typing::operator::ascribe_binary; +use crate::typing::operator::{ascribe_binary, ascribe_unary}; use crate::typing::pfc::ascribe_pfc; use crate::typing::registry::{FunctionId, Registry, SchemaId}; use crate::typing::schema::Schema; @@ -420,10 +420,7 @@ fn ascribe_type( LiteralValue::Bool(_) => BOOL_TYPE, }, }, - Expr::Unary(unary) => { - let typed_expr = ascribe_type(&unary.expr, table, checker, storage, ctx, errors); - typed_expr // TODO - } + Expr::Unary(unary) => ascribe_unary(unary, table, checker, storage, ctx, errors), Expr::Binary(binary) => ascribe_binary(binary, table, checker, storage, ctx, errors), Expr::TemplateString(tpl) => { ascribe_template_string(tpl, table, checker, storage, ctx, errors) diff --git a/analyzer/src/typing/operator.rs b/analyzer/src/typing/operator.rs index 3da1155f..729b5021 100644 --- a/analyzer/src/typing/operator.rs +++ b/analyzer/src/typing/operator.rs @@ -4,9 +4,73 @@ use crate::typing::user::UserType; use crate::typing::variable::VariableTable; use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind}; use crate::SourceLocation; -use ast::operation::{BinaryOperation, BinaryOperator}; +use ast::operation::{BinaryOperation, BinaryOperator, UnaryOperation, UnaryOperator}; use context::source::SourceSegmentHolder; +pub(super) fn ascribe_unary( + unary: &UnaryOperation, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let typed_expr = ascribe_type(&unary.expr, table, checker, storage, ctx, errors); + if typed_expr.is_err() { + return typed_expr; + } + let UserType::Parametrized { schema, params: _ } = checker.types[typed_expr.ty] else { + panic!("Expected a parametrized type"); + }; + let name = name_unary_method(unary.op); + let Some(method_id) = checker.registry[schema].methods.get(name).copied() else { + errors.push(TypeError::new( + TypeErrorKind::UnknownMethod { + name: name.to_owned(), + type_name: checker.display(typed_expr.ty), + }, + SourceLocation::new(table.path().to_owned(), unary.segment()), + )); + return TypedExpr::error(unary.segment()); + }; + + let Function { + ref param_types, + return_type, + .. + } = checker.registry[method_id]; + let [self_param] = param_types.as_slice() else { + errors.push(TypeError::new( + TypeErrorKind::ArityMismatch { + expected: param_types.len(), + received: 0, + }, + SourceLocation::new(table.path().to_owned(), unary.segment()), + )); + return TypedExpr::error(unary.segment()); + }; + if let Err(_) = checker.types.unify(self_param.ty, typed_expr.ty) { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(self_param.ty), + expected_due_to: None, + actual: checker.display(typed_expr.ty), + }, + SourceLocation::new(table.path().to_owned(), typed_expr.span), + )); + return TypedExpr::error(unary.segment()); + } + TypedExpr { + kind: ExprKind::MethodCall(MethodCall { + callee: Box::new(typed_expr), + arguments: Vec::new(), + function_id: method_id, + }), + ty: return_type, + span: unary.segment(), + } +} + pub(super) fn ascribe_binary( binary: &BinaryOperation, table: &mut VariableTable, @@ -84,6 +148,14 @@ pub(super) fn ascribe_binary( } } +fn name_unary_method(op: UnaryOperator) -> &'static str { + use UnaryOperator as Op; + match op { + Op::Not => "not", + Op::Negate => "neg", + } +} + fn name_binary_method(op: BinaryOperator) -> &'static str { use BinaryOperator as Op; match op { diff --git a/ast/src/operation.rs b/ast/src/operation.rs index 01400b1b..5df9a626 100644 --- a/ast/src/operation.rs +++ b/ast/src/operation.rs @@ -17,7 +17,7 @@ pub struct UnaryOperation { } /// A prefix unary operator. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Copy, Clone, PartialEq)] pub enum UnaryOperator { /// The `!` operator. Not, From 9d8895aa395d3dbb0b381fb082e25d4766c2ecc4 Mon Sep 17 00:00:00 2001 From: syldium Date: Wed, 9 Oct 2024 22:18:16 +0200 Subject: [PATCH 08/11] Fix last value type in VM unit tests --- analyzer/src/typing/assign.rs | 4 ++-- compiler/src/lib.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/analyzer/src/typing/assign.rs b/analyzer/src/typing/assign.rs index 023b1a81..969a21a3 100644 --- a/analyzer/src/typing/assign.rs +++ b/analyzer/src/typing/assign.rs @@ -1,7 +1,7 @@ use crate::hir::{ExprKind, LocalAssignment, MethodCall, Module, TypedExpr}; use crate::symbol::{SymbolRegistry, UndefinedSymbol}; use crate::typing::function::Function; -use crate::typing::user::UserType; +use crate::typing::user::{UserType, UNIT_TYPE}; use crate::typing::variable::VariableTable; use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; use crate::SourceLocation; @@ -76,7 +76,7 @@ pub(super) fn ascribe_assign( identifier: var.id, rhs: Box::new(rhs), }), - ty: var.ty, + ty: UNIT_TYPE, span: assign.segment(), } } diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index bdb64ba9..5a92ea46 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -218,7 +218,7 @@ fn compile_code( &chunk.expr }; let page_offset = cp.exported.last().map_or(0, |exp| { - exp.page_offset + u8::from(ValueStackSize::QWord) as u32 + exp.page_offset + u8::from(ValueStackSize::from(last_expr.ty)) as u32 }); cp.insert_exported(storage_exported_val, page_offset, last_expr.ty.is_obj()); instructions.emit_set_external( From 7e92c69d06e05f926b499d89079721ddac9b17df Mon Sep 17 00:00:00 2001 From: syldium Date: Sun, 13 Oct 2024 20:35:32 +0200 Subject: [PATCH 09/11] Find constructor invocations --- analyzer/src/hoist.rs | 5 +- analyzer/src/import.rs | 264 ++++++++++++++++++++++++++++++++ analyzer/src/lib.rs | 1 + analyzer/src/module.rs | 29 +++- analyzer/src/symbol.rs | 16 ++ analyzer/src/typing.rs | 141 ++++------------- analyzer/src/typing/pfc.rs | 65 +++++--- analyzer/src/typing/variable.rs | 2 +- cli/src/report.rs | 24 +-- 9 files changed, 395 insertions(+), 152 deletions(-) create mode 100644 analyzer/src/import.rs diff --git a/analyzer/src/hoist.rs b/analyzer/src/hoist.rs index c1935a75..0a3fcaa2 100644 --- a/analyzer/src/hoist.rs +++ b/analyzer/src/hoist.rs @@ -616,10 +616,7 @@ impl InvalidType<'_> { TypeErrorKind::UndefinedSymbol { name: name.to_owned(), expected: SymbolRegistry::Type, - found: match inner { - UndefinedSymbol::NotFound => None, - UndefinedSymbol::WrongRegistry(symbol) => Some(symbol), - }, + found: inner.into(), }, SourceLocation::new(path.to_owned(), span), ), diff --git a/analyzer/src/import.rs b/analyzer/src/import.rs new file mode 100644 index 00000000..5e711b0e --- /dev/null +++ b/analyzer/src/import.rs @@ -0,0 +1,264 @@ +//! Resolves paths to types. +//! +//! Some expressions contain arbitrary paths such as function paths or type paths. Most of the time, +//! those paths contain a single symbol, but they can also contain multiple symbols separated by +//! two colons. For instance, `std::io::File` is a valid path that first contains where the `File` +//! symbol is located and then the type itself. + +use crate::module::{ModuleTree, ModuleView}; +use crate::symbol::{SymbolRegistry, UndefinedSymbol}; +use crate::typing::user::{TypeArena, TypeId, UserType}; +use crate::typing::variable::{SymbolEntry, VariableTable}; +use crate::typing::ErroneousSymbolDesc; +use ast::r#use::InclusionPathItem; +use std::fmt; + +/// A keyword or identifier with an identified type. +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum PathEntry { + /// A named variable. + Variable, + + /// A named function. + Function, + + /// A struct type. + /// + /// This excludes modules, in contrast to the [`SymbolRegistry::Type`] symbol variant. + Type, + + /// A module that contains other modules and/or symbols. + Module, + + /// A `reef` keyword. + Reef, +} + +impl fmt::Display for PathEntry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PathEntry::Variable => write!(f, "variable"), + PathEntry::Function => write!(f, "function"), + PathEntry::Type => write!(f, "type"), + PathEntry::Module => write!(f, "module"), + PathEntry::Reef => write!(f, "reef"), + } + } +} + +impl From for PathEntry { + fn from(entry: SymbolEntry) -> Self { + Self::from(SymbolRegistry::from(entry)) + } +} + +impl From for PathEntry { + fn from(registry: SymbolRegistry) -> Self { + match registry { + SymbolRegistry::Variable => Self::Variable, + SymbolRegistry::Function => Self::Function, + SymbolRegistry::Type => Self::Type, + } + } +} + +impl From<&ErroneousSymbolDesc> for PathEntry { + fn from(desc: &ErroneousSymbolDesc) -> Self { + match desc { + ErroneousSymbolDesc::Partial(entry) => *entry, + ErroneousSymbolDesc::Complete(desc) => Self::from(desc.registry), + } + } +} + +/// A search for the last symbol in a path. +/// +/// Most of the time, the expected result may belong to only one [`SymbolRegistry`]. In those cases, +/// the search of the last symbol in the path can be done directly after constructing the search. +/// The [`crate::typing::lookup_type`] function may be used to do so. +pub(crate) struct SymbolSearch<'a> { + /// The last symbol in the path. + item: &'a InclusionPathItem, + + /// The search state after traversing the first part of the path. + state: SearchState<'a>, +} + +impl<'a> SymbolSearch<'a> { + /// Creates a new search for the given path. + /// + /// The search will be initialized with all symbols in the path excepted the last one. + /// If those first symbols are not found, an error will be returned. If not, the returned + /// search can be used to query different registries where the last symbol might be found. + pub(crate) fn new( + path: &'a [InclusionPathItem], + types: &'a TypeArena, + modules: ModuleView<'a>, + table: &'a VariableTable<'a>, + ) -> Result> { + let (item, path) = path.split_last().expect("path should not be empty"); + let search = PathSearch::new(path, types, modules, table)?; + Ok(Self { + item, + state: search.state, + }) + } + + /// Looks up the last symbol in the path in the given registry. + pub(crate) fn lookup(&self, registry: SymbolRegistry) -> Result> { + let InclusionPathItem::Symbol(ident) = self.item else { + return Err(PathItemError { + item: self.item, + err: PathError::Invalid(PathEntry::Reef), + }); + }; + match self.state { + SearchState::Start { modules, table } => { + match table.lookup(ident.value.as_str(), registry) { + Ok(symbol) => Ok(symbol.ty), + Err(err @ UndefinedSymbol::WrongRegistry(_)) => Err(PathItemError { + item: self.item, + err: PathError::from(err), + }), + Err(UndefinedSymbol::NotFound) => Err(PathItemError { + item: self.item, + err: if modules.get(self.item).is_some() { + PathError::Invalid(PathEntry::Module) + } else { + PathError::NotFound + }, + }), + } + } + SearchState::Module(tree) => match tree.find_export(ident.value.as_str(), registry) { + Ok(export) => Ok(export.ty), + Err(err) => Err(PathItemError { + item: self.item, + err: PathError::from(err), + }), + }, + } + } +} + +/// The first part of a symbol search. +struct PathSearch<'a> { + types: &'a TypeArena, + state: SearchState<'a>, +} + +/// The query point for the next symbol in the path. +enum SearchState<'a> { + /// The initial state at the beginning of the path. + /// + /// Before the first symbol in the path, it is not yet known whether the path is absolute and + /// starts with the name of another reef or it relative to a symbol already in scope. + Start { + modules: ModuleView<'a>, + table: &'a VariableTable<'a>, + }, + + /// The search is currently in a module. + Module(&'a ModuleTree), +} + +/// A [`PathError`] with the path item that caused the error. +pub struct PathItemError<'a> { + /// The path item that caused the error. + pub item: &'a InclusionPathItem, + + /// The error that occurred. + pub err: PathError, +} + +/// An error that occurred while searching for a path item. +pub enum PathError { + /// The path item was not found. + NotFound, + + /// The path item matches a symbol of an unexpected type. + Invalid(PathEntry), + + /// The path does refer to an already invalid path. + ErrorType, +} + +impl From for PathError { + fn from(err: UndefinedSymbol) -> Self { + match err { + UndefinedSymbol::NotFound => Self::NotFound, + UndefinedSymbol::WrongRegistry(desc) => Self::Invalid(PathEntry::from(desc.registry)), + } + } +} + +impl From for Option { + fn from(err: PathError) -> Self { + match err { + PathError::NotFound => None, + PathError::Invalid(entry) => Some(ErroneousSymbolDesc::Partial(entry)), + PathError::ErrorType => None, + } + } +} + +impl<'a> PathSearch<'a> { + fn new( + path: &'a [InclusionPathItem], + types: &'a TypeArena, + modules: ModuleView<'a>, + table: &'a VariableTable<'a>, + ) -> Result> { + let mut search = Self { + types, + state: SearchState::Start { modules, table }, + }; + for item in path { + search.state = search + .next_state(item) + .map_err(|err| PathItemError { item, err })?; + } + Ok(search) + } + + fn next_state(&self, item: &InclusionPathItem) -> Result, PathError> { + match self.state { + SearchState::Start { modules, table } => { + if let InclusionPathItem::Symbol(ident) = item { + match table.lookup(ident.value.as_str(), SymbolRegistry::Type) { + Ok(symbol) => { + return match self.types[symbol.ty] { + UserType::Module(ref path) => modules + .get_direct(path) + .map_or(Err(PathError::ErrorType), |tree| { + Ok(SearchState::Module(tree)) + }), + UserType::Error => Err(PathError::ErrorType), + _ => Err(PathError::Invalid(PathEntry::Type)), + } + } + Err(UndefinedSymbol::WrongRegistry(symbol)) => { + return Err(PathError::Invalid(PathEntry::from(symbol.registry))); + } + Err(UndefinedSymbol::NotFound) => { /* continue */ } + } + } + if let Some(tree) = modules.get(item) { + Ok(SearchState::Module(tree)) + } else { + Err(PathError::NotFound) + } + } + SearchState::Module(tree) => { + let InclusionPathItem::Symbol(ident) = item else { + return Err(PathError::Invalid(PathEntry::Reef)); + }; + if let Some(tree) = tree.get(ident.value.as_ref()) { + Ok(SearchState::Module(tree)) + } else { + Err(PathError::NotFound) + } + } + } + } +} diff --git a/analyzer/src/lib.rs b/analyzer/src/lib.rs index 5ce0ce5d..2f741ab8 100644 --- a/analyzer/src/lib.rs +++ b/analyzer/src/lib.rs @@ -17,6 +17,7 @@ pub mod hir; mod hoist; +pub mod import; mod module; pub mod symbol; pub mod typing; diff --git a/analyzer/src/module.rs b/analyzer/src/module.rs index a742799b..f12bd1a6 100644 --- a/analyzer/src/module.rs +++ b/analyzer/src/module.rs @@ -14,7 +14,7 @@ //! that should be in that file. So it will try again, but with the last component removed. If it //! is still not found, it will continue to pop the path components until it finds a file to parse. -use crate::symbol::SymbolRegistry; +use crate::symbol::{SymbolDesc, SymbolRegistry, UndefinedSymbol}; use crate::typing::user::{TypeId, UNKNOWN_TYPE}; use crate::typing::{TypeError, TypeErrorKind}; use crate::{Filesystem, PipelineError, Reef, SourceLocation, UnitKey}; @@ -243,10 +243,25 @@ impl ModuleTree { std::mem::take(&mut current.exports) } - pub fn find_export(&self, name: &str, symbol_registry: SymbolRegistry) -> Option<&Export> { - self.exports - .iter() - .find(|e| e.name == name && e.registry == symbol_registry) + pub fn find_export( + &self, + name: &str, + registry: SymbolRegistry, + ) -> Result<&Export, UndefinedSymbol> { + let mut other_export: Option<&Export> = None; + for export in self.exports.iter().rev() { + if export.name == name { + if export.registry == registry { + return Ok(export); + } else { + other_export = Some(export); + } + } + } + Err(match other_export { + Some(export) => UndefinedSymbol::WrongRegistry(SymbolDesc::from(export)), + None => UndefinedSymbol::NotFound, + }) } } @@ -266,14 +281,14 @@ impl<'a> ModuleView<'a> { Self { current, foreign } } - pub(crate) fn get(&self, item: &InclusionPathItem) -> Option<&ModuleTree> { + pub(crate) fn get(&self, item: &InclusionPathItem) -> Option<&'a ModuleTree> { match item { InclusionPathItem::Symbol(ident) => self.foreign.get(OsStr::new(ident.value.as_str())), InclusionPathItem::Reef(_) => Some(self.current), } } - pub(crate) fn get_direct(&self, path: &[InclusionPathItem]) -> Option<&ModuleTree> { + pub(crate) fn get_direct(&self, path: &[InclusionPathItem]) -> Option<&'a ModuleTree> { let (first, rest) = path.split_first().expect("path should not be empty"); let mut tree = self.get(first)?; for item in rest { diff --git a/analyzer/src/symbol.rs b/analyzer/src/symbol.rs index ed36850a..6cbfddf2 100644 --- a/analyzer/src/symbol.rs +++ b/analyzer/src/symbol.rs @@ -1,5 +1,6 @@ use crate::module::Export; use crate::typing::user::TypeId; +use crate::typing::ErroneousSymbolDesc; use context::source::Span; use std::fmt; use std::path::PathBuf; @@ -209,3 +210,18 @@ impl From for Option { } } } + +impl From for Option { + fn from(undefined: UndefinedSymbol) -> Self { + Option::::from(undefined).map(ErroneousSymbolDesc::Complete) + } +} + +impl From<&Export> for SymbolDesc { + fn from(export: &Export) -> Self { + Self { + registry: export.registry, + span: export.span.clone(), + } + } +} diff --git a/analyzer/src/typing.rs b/analyzer/src/typing.rs index 1733f621..5d3e5aff 100644 --- a/analyzer/src/typing.rs +++ b/analyzer/src/typing.rs @@ -11,8 +11,9 @@ pub mod user; pub mod variable; use crate::hir::{Conditional, Declaration, ExprKind, Module, TypedExpr}; +use crate::import::{PathEntry, PathItemError, SymbolSearch}; use crate::module::ModuleView; -use crate::symbol::{Symbol, SymbolDesc, SymbolRegistry, UndefinedSymbol}; +use crate::symbol::{Symbol, SymbolDesc, SymbolRegistry}; use crate::typing::assign::{ ascribe_assign, ascribe_identifier, ascribe_subscript, ascribe_var_reference, }; @@ -232,13 +233,19 @@ impl TypeError { } } +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum ErroneousSymbolDesc { + Partial(PathEntry), + Complete(SymbolDesc), +} + #[derive(Error, Debug, Clone, PartialEq, Eq)] pub enum TypeErrorKind { #[error("undefined {expected} `{name}`")] UndefinedSymbol { name: String, expected: SymbolRegistry, - found: Option, + found: Option, }, #[error("duplicate symbol `{name}`")] @@ -979,121 +986,22 @@ fn lookup_path( modules: ModuleView, errors: &mut Vec, ) -> TypeId { - let (first, rest) = path.split_first().expect("path should not be empty"); - let mut tree = match first { - InclusionPathItem::Symbol(ident) => match table.lookup(ident.value.as_str(), registry) { - Ok(symbol) => { - if !rest.is_empty() { - errors.push(TypeError::new( - TypeErrorKind::UndefinedSymbol { - name: ident.value.to_string(), - expected: registry, - found: Some(SymbolDesc { - registry: symbol.registry, - span: symbol.declared_at.clone(), - }), - }, - SourceLocation::new(table.path().to_owned(), ident.segment()), - )); - } - return symbol.ty; - } - Err(UndefinedSymbol::WrongRegistry(SymbolDesc { - registry: SymbolRegistry::Type, - .. - })) => { - let symbol = table - .get(ident.value.as_str(), SymbolRegistry::Type) - .expect("module should be defined in the table"); - let UserType::Module(path) = &checker.types[symbol.ty] else { - panic!( - "module should have a module type, got {:?}", - checker.types[symbol.ty] - ); - }; - match modules.get_direct(path) { - Some(tree) => tree, - None => { - return ERROR_TYPE; - } - } - } - Err(err) => { - if let Some(module) = modules.get(first) { - module - } else { - errors.push(TypeError::new( - TypeErrorKind::UndefinedSymbol { - name: ident.value.to_string(), - expected: registry, - found: err.into(), - }, - SourceLocation::new(table.path().to_owned(), ident.segment()), - )); - return ERROR_TYPE; - } - } - }, - InclusionPathItem::Reef(_) => modules.current, - }; - let Some((last, rest)) = rest.split_last() else { - return ERROR_TYPE; - }; - for item in rest { - let InclusionPathItem::Symbol(ident) = item else { + match SymbolSearch::new(path, &checker.types, modules, table) + .and_then(|search| search.lookup(registry)) + { + Ok(search) => search, + Err(PathItemError { item, err }) => { errors.push(TypeError::new( TypeErrorKind::UndefinedSymbol { name: item.to_string(), - expected: SymbolRegistry::Type, - found: None, - }, - SourceLocation::new(table.path().to_owned(), item.segment()), - )); - return ERROR_TYPE; - }; - match tree.get(OsStr::new(ident.value.as_str())) { - Some(child_tree) => tree = child_tree, - None => { - errors.push(TypeError::new( - TypeErrorKind::UndefinedSymbol { - name: item.to_string(), - expected: SymbolRegistry::Type, - found: None, - }, - SourceLocation::new(table.path().to_owned(), item.segment()), - )); - return ERROR_TYPE; - } - } - } - match last { - InclusionPathItem::Symbol(ident) => { - if let Some(export) = tree - .exports - .iter() - .find(|export| export.name == ident.value && export.registry == registry) - { - export.ty - } else { - errors.push(TypeError::new( - TypeErrorKind::UndefinedSymbol { - name: last.to_string(), - expected: registry, - found: None, + expected: if path.last() == Some(item) { + registry + } else { + SymbolRegistry::Type }, - SourceLocation::new(table.path().to_owned(), ident.segment()), - )); - ERROR_TYPE - } - } - InclusionPathItem::Reef(span) => { - errors.push(TypeError::new( - TypeErrorKind::UndefinedSymbol { - name: last.to_string(), - expected: SymbolRegistry::Type, - found: None, + found: err.into(), }, - SourceLocation::new(table.path().to_owned(), span.clone()), + SourceLocation::new(table.path().to_owned(), item.segment()), )); ERROR_TYPE } @@ -1379,6 +1287,15 @@ mod tests { ); } + #[test] + fn reuse_path() { + let errors = type_check_multi([ + (PathBuf::from("test"), "use reef::bar::foo\nfoo::test()"), + (PathBuf::from("bar/foo"), "fun test() = {}"), + ]); + assert_eq!(errors, []); + } + #[test] fn pass_types_across_files() { let errors = type_check_multi([ diff --git a/analyzer/src/typing/pfc.rs b/analyzer/src/typing/pfc.rs index f64ddd21..0c7710eb 100644 --- a/analyzer/src/typing/pfc.rs +++ b/analyzer/src/typing/pfc.rs @@ -1,11 +1,11 @@ use crate::hir::{ExprKind, FunctionCall, Module, TypedExpr}; +use crate::import::{PathItemError, SymbolSearch}; use crate::symbol::SymbolRegistry; use crate::typing::function::Function; use crate::typing::user::{TypeId, UserType, ERROR_TYPE, UNKNOWN_TYPE}; use crate::typing::variable::VariableTable; use crate::typing::{ - ascribe_type, lookup_path, lookup_type, Context, TypeChecker, TypeError, TypeErrorKind, - TypeHint, + ascribe_type, lookup_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint, }; use crate::SourceLocation; use ast::call::ProgrammaticCall; @@ -28,23 +28,52 @@ pub fn ascribe_pfc( .iter() .map(|expr| ascribe_type(expr, table, checker, storage, ctx, errors)) .collect::>(); - let ty = lookup_path( - path, - SymbolRegistry::Function, - table, - checker, - modules, - errors, - ); - if ty.is_err() { - return TypedExpr::error(span.clone()); - } - let UserType::Function(function_id) = checker.types[ty] else { - panic!( - "function should have a function type {ty:?} {:?}", - &checker.types[ty] - ); + let res = match SymbolSearch::new(path, &checker.types, modules, table) { + Ok(ref search) => { + let mut res = search.lookup(SymbolRegistry::Function); + if res.is_err() { + if let Ok(ty) = search.lookup(SymbolRegistry::Type) { + res = Ok(ty); + } + } + res + } + Err(err) => Err(err), + }; + let ty = match res { + Ok(search) => search, + Err(PathItemError { item, err }) => { + errors.push(TypeError::new( + TypeErrorKind::UndefinedSymbol { + name: item.to_string(), + expected: if path.last() == Some(item) { + SymbolRegistry::Function + } else { + SymbolRegistry::Type + }, + found: err.into(), + }, + SourceLocation::new(table.path().to_owned(), item.segment()), + )); + return TypedExpr::error(span.clone()); + } + }; + + let function_id = match checker.types[ty] { + UserType::Error => return TypedExpr::error(span.clone()), + UserType::Function(function_id) => function_id, + _ => { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "function".to_owned(), + expected_due_to: None, + actual: checker.display(ty), + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + return TypedExpr::error(span.clone()); + } }; let mut type_parameters = type_parameters diff --git a/analyzer/src/typing/variable.rs b/analyzer/src/typing/variable.rs index 02a9c5ed..443978bf 100644 --- a/analyzer/src/typing/variable.rs +++ b/analyzer/src/typing/variable.rs @@ -131,7 +131,7 @@ impl<'a> VariableTable<'a> { self.inner.get(name, registry) } - pub(super) fn lookup( + pub(crate) fn lookup( &self, name: &str, registry: SymbolRegistry, diff --git a/cli/src/report.rs b/cli/src/report.rs index e32485f1..c6a80aef 100644 --- a/cli/src/report.rs +++ b/cli/src/report.rs @@ -1,11 +1,11 @@ use std::path::PathBuf; +use analyzer::import::PathEntry; use analyzer::symbol::SymbolDesc; -use analyzer::typing::{TypeError, TypeErrorKind}; +use analyzer::typing::{ErroneousSymbolDesc, TypeError, TypeErrorKind}; use analyzer::{Filesystem, PipelineError, SourceLocation}; -use miette::{LabeledSpan, MietteDiagnostic, Severity, SourceOffset, SourceSpan}; - use context::source::Span; +use miette::{LabeledSpan, MietteDiagnostic, Severity, SourceOffset, SourceSpan}; pub fn error_to_diagnostic( value: PipelineError, @@ -51,14 +51,18 @@ fn type_error_to_diagnostic( TypeErrorKind::UndefinedSymbol { name, expected, - found: Some(SymbolDesc { registry, span }), + found: Some(desc), } => { - let symbol_span = multi_file.insert(at.path, span, fs); - diagnostic.message = format!("expected {expected}, found {registry} `{name}`"); - diagnostic.and_label(LabeledSpan::new_with_span( - Some(format!("{registry} defined here")), - symbol_span, - )) + let entry = PathEntry::from(&desc); + diagnostic.message = format!("expected {expected}, found {entry} `{name}`"); + if let ErroneousSymbolDesc::Complete(SymbolDesc { registry, span }) = desc { + let span = multi_file.insert(at.path, span, fs); + diagnostic = diagnostic.with_label(LabeledSpan::new_with_span( + Some(format!("{registry} defined here")), + span, + )) + } + diagnostic } TypeErrorKind::TypeMismatch { expected_due_to: Some(expected_due_to), From 098af0d96c1208c2c7f9816719b64d429ca957ae Mon Sep 17 00:00:00 2001 From: syldium Date: Sun, 20 Oct 2024 12:44:21 +0200 Subject: [PATCH 10/11] Type structures and for statements --- analyzer/src/hoist.rs | 49 +++++---- analyzer/src/module.rs | 6 -- analyzer/src/typing.rs | 165 ++++++---------------------- analyzer/src/typing/assign.rs | 59 +++++++--- analyzer/src/typing/iterable.rs | 184 ++++++++++++++++++++++++++++++++ analyzer/src/typing/pfc.rs | 64 +++++++++-- analyzer/src/typing/registry.rs | 9 ++ analyzer/src/typing/schema.rs | 179 ++++++++++++++++++++++++++++++- compiler/src/emit/iterable.rs | 71 ++++++------ compiler/src/emit/native.rs | 2 +- compiler/src/lib.rs | 21 +++- compiler/src/locals.rs | 5 +- compiler/src/structure.rs | 6 +- vm/src/stdlib_natives.cpp | 3 +- vm/tests/integration/runner.rs | 35 +++--- 15 files changed, 609 insertions(+), 249 deletions(-) create mode 100644 analyzer/src/typing/iterable.rs diff --git a/analyzer/src/hoist.rs b/analyzer/src/hoist.rs index 0a3fcaa2..dd7d6fe2 100644 --- a/analyzer/src/hoist.rs +++ b/analyzer/src/hoist.rs @@ -1,7 +1,7 @@ use crate::module::{Export, ModuleTree, ModuleView}; use crate::symbol::{SymbolRegistry, SymbolTable, UndefinedSymbol}; use crate::typing::function::{Function, FunctionKind}; -use crate::typing::schema::Schema; +use crate::typing::schema::{Schema, SchemaField}; use crate::typing::user::{ lookup_builtin_type, TypeId, UserType, ERROR_TYPE, STRING_TYPE, UNIT_TYPE, }; @@ -133,16 +133,25 @@ fn hoist_type_names( for expr in root.iter() { if let Expr::StructDeclaration(StructDeclaration { name, + parameters, segment: span, .. }) = expr { + let generics = parameters + .iter() + .map(|param| { + checker + .types + .alloc(UserType::GenericVariable(param.name.to_string())) + }) + .collect::>(); let schema = checker .registry - .define_schema(Schema::new(name.value.to_string())); + .define_schema(Schema::generic(name.value.to_string(), generics.clone())); let ty = checker.types.alloc(UserType::Parametrized { schema, - params: Vec::new(), + params: generics, }); table.insert_local(name.to_string(), ty, span.clone(), SymbolRegistry::Type); if let Some(export) = exports @@ -417,14 +426,7 @@ fn hoist_struct_decl( let UserType::Parametrized { schema, .. } = checker.types[ty] else { panic!("the type should have a schema"); }; - let generics = parameters - .iter() - .map(|param| { - checker - .types - .alloc(UserType::GenericVariable(param.name.to_string())) - }) - .collect::>(); + let generics = checker.registry[schema].generic_variables.clone(); table.enter_scope(); for (name, ty) in parameters.iter().zip(generics.iter()) { table.insert_local( @@ -444,22 +446,33 @@ fn hoist_struct_decl( ERROR_TYPE } }; - ( - field.name.to_string(), - Parameter { + SchemaField { + name: field.name.to_string(), + param: Parameter { ty, span: field.tpe.segment(), }, - ) + } }) - .collect::>(); + .collect::>(); + let constructor = checker.registry.define_function(Function { + declared_at: table.path.clone(), + fqn: PathBuf::from(""), + generic_variables: generics.clone(), + param_types: fields_types + .iter() + .map(|field| field.param.clone()) + .collect::>(), + return_type: ty, + kind: FunctionKind::Constructor, + }); let Schema { - ref mut generic_variables, ref mut fields, + ref mut methods, .. } = checker.registry[schema]; - generic_variables.extend(generics); fields.extend(fields_types); + methods.insert("".to_owned(), constructor); table.exit_scope(); } diff --git a/analyzer/src/module.rs b/analyzer/src/module.rs index f12bd1a6..2781d398 100644 --- a/analyzer/src/module.rs +++ b/analyzer/src/module.rs @@ -49,12 +49,6 @@ pub(super) struct Export { pub(super) ty: TypeId, } -#[derive(Debug, PartialEq, Eq, Clone)] -pub(crate) struct ResolvedImport { - pub(crate) path: PathBuf, - pub(crate) export_idx: usize, -} - /// An instruction to import a module. #[derive(Debug)] struct Import { diff --git a/analyzer/src/typing.rs b/analyzer/src/typing.rs index 5d3e5aff..469fee4d 100644 --- a/analyzer/src/typing.rs +++ b/analyzer/src/typing.rs @@ -1,6 +1,7 @@ mod assign; mod flow; pub mod function; +mod iterable; mod lower; mod operator; mod pfc; @@ -19,11 +20,12 @@ use crate::typing::assign::{ }; use crate::typing::flow::{ascribe_control, ascribe_while}; use crate::typing::function::Function; +use crate::typing::iterable::{ascribe_for, ascribe_range}; use crate::typing::lower::{ascribe_template_string, coerce_condition}; use crate::typing::operator::{ascribe_binary, ascribe_unary}; use crate::typing::pfc::ascribe_pfc; -use crate::typing::registry::{FunctionId, Registry, SchemaId}; -use crate::typing::schema::Schema; +use crate::typing::registry::{Registry, SchemaId}; +use crate::typing::schema::{ascribe_field_access, Schema}; use crate::typing::shell::{ ascribe_call, ascribe_detached, ascribe_file_pattern, ascribe_pipeline, ascribe_redirected, ascribe_substitution, @@ -38,7 +40,7 @@ use ast::call::MethodCall; use ast::control_flow::If; use ast::function::FunctionDeclaration; use ast::group::Block; -use ast::r#struct::{FieldAccess, StructImpl}; +use ast::r#struct::StructImpl; use ast::r#type::{ByName, ParametrizedType, Type}; use ast::r#use::{Import, ImportList, InclusionPathItem, Use}; use ast::range::Iterable; @@ -150,77 +152,10 @@ impl TypeChecker { UserType::GenericVariable(name) => name.clone(), } } - - fn get_field(&mut self, ty: TypeId, field: &str) -> Result { - match &self.types[ty] { - UserType::Error => Ok(ERROR_TYPE), - UserType::Nothing | UserType::Unit => Err(FieldError::ExpectedStruct), - UserType::Parametrized { schema, params } => { - let Schema { - generic_variables, - fields, - methods, - .. - } = &self.registry[*schema]; - if let Some(field) = fields.get(field) { - Ok( - if let Some(concrete_ty) = - generic_variables.iter().position(|&ty| ty == field.ty) - { - params[concrete_ty] - } else if generic_variables.is_empty() { - field.ty - } else { - // TODO: use concretize - match &self.types[field.ty] { - UserType::Parametrized { - schema, - params: sub_params, - } => { - let params = sub_params - .iter() - .map(|ty| { - if let Some(concrete_ty) = - generic_variables.iter().position(|&pty| pty == *ty) - { - params[concrete_ty] - } else { - *ty - } - }) - .collect::>(); - self.types.alloc(UserType::Parametrized { - schema: *schema, - params, - }) - } - _ => field.ty, - } - }, - ) - } else if let Some(method) = methods.get(field) { - Err(FieldError::IsMethod(*method)) - } else { - Err(FieldError::UnknownField { - available: fields.keys().cloned().collect(), - }) - } - } - _ => Err(FieldError::UnknownField { - available: Vec::new(), - }), - } - } } pub(crate) struct UnifyError; -enum FieldError { - ExpectedStruct, - UnknownField { available: Vec }, - IsMethod(FunctionId), -} - #[derive(Debug, Clone, PartialEq, Eq)] pub struct TypeError { pub kind: TypeErrorKind, @@ -261,6 +196,12 @@ pub enum TypeErrorKind { actual: String, }, + #[error("trait `{trait_name}` not implemented for type `{type_name}`")] + TraitNotImplemented { + trait_name: String, + type_name: String, + }, + #[error("expected {expected} arguments but received {received}")] ArityMismatch { expected: usize, received: usize }, @@ -483,6 +424,7 @@ fn ascribe_type( ascribe_subscript(subscript, table, checker, storage, ctx, errors) } Expr::While(stmt) => ascribe_while(stmt, table, checker, storage, ctx, errors), + Expr::For(stmt) => ascribe_for(stmt, table, checker, storage, ctx, errors), Expr::Break(span) => ascribe_control(ExprKind::Break, span.clone(), table, ctx, errors), Expr::Continue(span) => { ascribe_control(ExprKind::Continue, span.clone(), table, ctx, errors) @@ -498,67 +440,7 @@ fn ascribe_type( } Expr::StructDeclaration(decl) => TypedExpr::noop(decl.segment.clone()), Expr::ProgrammaticCall(call) => ascribe_pfc(call, table, checker, storage, ctx, errors), - Expr::FieldAccess(FieldAccess { - expr, - field, - segment: span, - }) => { - let typed_expr = ascribe_type(expr, table, checker, storage, ctx, errors); - match checker.get_field(typed_expr.ty, field.value.as_str()) { - Ok(field_ty) => { - return TypedExpr { - kind: ExprKind::Noop, - span: span.clone(), - ty: field_ty, - }; - } - Err(FieldError::ExpectedStruct) => { - errors.push(TypeError::new( - TypeErrorKind::TypeMismatch { - expected: "Struct".to_string(), - expected_due_to: None, - actual: checker.display(typed_expr.ty), - }, - SourceLocation::new(table.path().to_owned(), span.clone()), - )); - } - Err(FieldError::UnknownField { available }) => { - errors.push(TypeError::new( - TypeErrorKind::UnknownField { - name: field.value.to_string(), - type_name: checker.display(typed_expr.ty), - available, - }, - SourceLocation::new(table.path().to_owned(), field.segment()), - )); - } - Err(FieldError::IsMethod(method)) => { - let Function { - ref param_types, .. - } = checker.registry[method]; - let mut builder = "(".to_owned(); - for param in param_types { - if param.ty == typed_expr.ty { - continue; - } - if builder.ends_with('(') { - builder.push('_'); - } else { - builder.push_str(", _"); - } - } - builder.push(')'); - errors.push(TypeError::new( - TypeErrorKind::MethodLikeFieldAccess { - name: field.value.to_string(), - parentheses: builder, - }, - SourceLocation::new(table.path().to_owned(), field.segment()), - )); - } - } - TypedExpr::error(span.clone()) - } + Expr::FieldAccess(expr) => ascribe_field_access(expr, table, checker, storage, ctx, errors), Expr::VarReference(ident) => ascribe_var_reference(ident, table, errors), Expr::Path(ident) => ascribe_identifier(ident, table, errors), Expr::Block(Block { @@ -588,7 +470,7 @@ fn ascribe_type( ascribe_pipeline(pipeline, table, checker, storage, ctx, errors) } Expr::Range(iterable) => match iterable { - Iterable::Range(range) => todo!("{range:?}"), + Iterable::Range(range) => ascribe_range(range, table, checker, storage, ctx, errors), Iterable::Files(pattern) => { ascribe_file_pattern(pattern, table, checker, storage, ctx, errors) } @@ -758,7 +640,7 @@ fn ascribe_type( span: span.clone(), ty: return_type, } - } else if let Some(field) = fields.get(name) { + } else if let Some(_) = fields.iter().find(|field| field.name == name) { errors.push(TypeError::new( TypeErrorKind::MethodLikeFieldAccess { name: name.to_string(), @@ -1180,7 +1062,7 @@ mod tests { use std::ffi::OsString; use std::path::PathBuf; - fn type_check(source: &str) -> Vec { + pub(crate) fn type_check(source: &str) -> Vec { let fs = MemoryFilesystem::new(HashMap::from([(PathBuf::from("main.msh"), source)])); check(fs, "main.msh") } @@ -1674,4 +1556,19 @@ mod tests { )] ); } + + #[test] + fn different_generic_len() { + let errors = type_check("fun foo[T, U](t: T, u: U) -> T; foo::[Int](1, true)"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::ArityMismatch { + expected: 2, + received: 1, + }, + SourceLocation::new(PathBuf::from("main.msh"), 38..41), + )] + ); + } } diff --git a/analyzer/src/typing/assign.rs b/analyzer/src/typing/assign.rs index 969a21a3..82f0744d 100644 --- a/analyzer/src/typing/assign.rs +++ b/analyzer/src/typing/assign.rs @@ -1,10 +1,11 @@ -use crate::hir::{ExprKind, LocalAssignment, MethodCall, Module, TypedExpr}; +use crate::hir::{ExprKind, FieldAssign, LocalAssignment, MethodCall, Module, TypedExpr}; use crate::symbol::{SymbolRegistry, UndefinedSymbol}; use crate::typing::function::Function; +use crate::typing::schema::ascribe_field_access; use crate::typing::user::{UserType, UNIT_TYPE}; use crate::typing::variable::VariableTable; use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; -use crate::SourceLocation; +use crate::{hir, SourceLocation}; use ast::operation::{BinaryOperation, BinaryOperator}; use ast::r#struct::FieldAccess; use ast::r#use::InclusionPathItem; @@ -53,16 +54,6 @@ pub(super) fn ascribe_assign( ); match left { Ok(var) => { - if let Err(_) = checker.types.unify(rhs.ty, var.ty) { - errors.push(TypeError::new( - TypeErrorKind::TypeMismatch { - expected: checker.display(var.ty), - expected_due_to: None, - actual: checker.display(rhs.ty), - }, - SourceLocation::new(table.path().to_owned(), assign.segment()), - )); - } if !var.can_reassign { errors.push(TypeError::new( TypeErrorKind::CannotReassign { @@ -180,7 +171,8 @@ pub(super) fn ascribe_subscript( /// Creates the right hand side of an assignment. /// -/// The state should contain the [`ExpressionValue::Expected`] value of the left hand side. +/// The state should contain the [`TypeHint::Required`] value of the left hand side. If not, the +/// type of the right hand side will not be checked. fn ascribe_assign_rhs( assign: &Assign, table: &mut VariableTable, @@ -189,7 +181,7 @@ fn ascribe_assign_rhs( ctx: Context, errors: &mut Vec, ) -> TypedExpr { - match assign.operator { + let expr = match assign.operator { AssignOperator::Assign => ascribe_type(&assign.value, table, checker, storage, ctx, errors), operator => { let binary = Expr::Binary(BinaryOperation { @@ -206,7 +198,20 @@ fn ascribe_assign_rhs( errors, ) } + }; + if let TypeHint::Required(ty) = ctx.hint { + if let Err(_) = checker.types.unify(expr.ty, ty) { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(ty), + expected_due_to: None, + actual: checker.display(expr.ty), + }, + SourceLocation::new(table.path().to_owned(), assign.segment()), + )); + } } + expr } fn ascribe_assign_subscript( @@ -230,7 +235,31 @@ fn ascribe_field_assign( ctx: Context, errors: &mut Vec, ) -> TypedExpr { - todo!() + let TypedExpr { + kind: + ExprKind::FieldAccess(hir::FieldAccess { + object, + structure, + field, + }), + ty: field_ty, + span: _, + } = ascribe_field_access(field, table, checker, module, ctx, errors) + else { + return TypedExpr::error(assign.segment()); + }; + let ctx = ctx.with_hint(TypeHint::Required(field_ty)); + let new_value = ascribe_assign_rhs(assign, table, checker, module, ctx, errors); + TypedExpr { + kind: ExprKind::FieldAssign(FieldAssign { + object, + structure, + field, + new_value: Box::new(new_value), + }), + ty: UNIT_TYPE, + span: assign.segment(), + } } pub(super) fn ascribe_identifier( diff --git a/analyzer/src/typing/iterable.rs b/analyzer/src/typing/iterable.rs new file mode 100644 index 00000000..24eef593 --- /dev/null +++ b/analyzer/src/typing/iterable.rs @@ -0,0 +1,184 @@ +use crate::hir::{ExprKind, ForLoop, FunctionCall, Module, RangeFor, TypedExpr}; +use crate::typing::registry::{INCLUSIVE_RANGE_SCHEMA, RANGE_SCHEMA, STRING_SCHEMA, VEC_SCHEMA}; +use crate::typing::user::{UserType, INT_TYPE, STRING_TYPE, UNIT_TYPE}; +use crate::typing::variable::{Var, VariableTable}; +use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; +use crate::{hir, SourceLocation}; +use ast::control_flow::{For, ForKind}; +use ast::range::NumericRange; +use ast::value::LiteralValue; +use context::source::SourceSegmentHolder; +use std::ffi::OsStr; + +pub(super) fn ascribe_for( + it: &For, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + table.enter_scope(); + let typed_expr = match it.kind.as_ref() { + ForKind::Range(range) => { + let iterable = ascribe_type(&range.iterable, table, checker, storage, ctx, errors); + let receiver_type = match checker.types[iterable.ty] { + UserType::Parametrized { schema, ref params } if schema == VEC_SCHEMA => params[0], + UserType::Parametrized { schema, .. } if schema == STRING_SCHEMA => STRING_TYPE, + UserType::Parametrized { schema, .. } + if schema == RANGE_SCHEMA || schema == INCLUSIVE_RANGE_SCHEMA => + { + INT_TYPE + } + _ => { + errors.push(TypeError::new( + TypeErrorKind::TraitNotImplemented { + trait_name: "Iterator".to_owned(), + type_name: checker.display(iterable.ty), + }, + SourceLocation::new(table.path().to_owned(), iterable.span.clone()), + )); + iterable.ty + } + }; + let Var::Local(receiver) = table.insert_variable( + range.receiver.to_string(), + receiver_type, + range.iterable.segment(), + false, + ) else { + panic!("Expected a local variable"); + }; + let ctx = ctx.with_hint(TypeHint::Unused).in_loop(); + let body = ascribe_type(&it.body, table, checker, storage, ctx, errors); + TypedExpr { + kind: ExprKind::ForLoop(ForLoop { + kind: Box::new(hir::ForKind::Range(RangeFor { + receiver, + receiver_type, + iterable, + })), + body: Box::new(body), + }), + ty: UNIT_TYPE, + span: it.segment.clone(), + } + } + ForKind::Conditional(conditional) => { + let initializer = ascribe_type( + &conditional.initializer, + table, + checker, + storage, + ctx, + errors, + ); + let condition = + ascribe_type(&conditional.condition, table, checker, storage, ctx, errors); + let increment = + ascribe_type(&conditional.increment, table, checker, storage, ctx, errors); + let ctx = ctx.with_hint(TypeHint::Unused).in_loop(); + let body = ascribe_type(&it.body, table, checker, storage, ctx, errors); + TypedExpr { + kind: ExprKind::ForLoop(ForLoop { + kind: Box::new(hir::ForKind::Conditional(hir::ConditionalFor { + initializer, + condition, + increment, + })), + body: Box::new(body), + }), + ty: UNIT_TYPE, + span: it.segment.clone(), + } + } + }; + table.exit_scope(); + typed_expr +} + +pub(super) fn ascribe_range( + range: &NumericRange, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let ctx = ctx.with_hint(TypeHint::Required(INT_TYPE)); + let start = ascribe_type(&range.start, table, checker, storage, ctx, errors); + let end = ascribe_type(&range.end, table, checker, storage, ctx, errors); + let step = range + .step + .as_ref() + .map(|step| ascribe_type(step, table, checker, storage, ctx, errors)) + .unwrap_or_else(|| TypedExpr { + kind: ExprKind::Literal(LiteralValue::Int(1)), + ty: INT_TYPE, + span: range.segment(), + }); + + let args = [&start, &end, &step]; + for arg in &args { + if !checker.types.are_same(arg.ty, INT_TYPE) { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Int".to_owned(), + expected_due_to: None, + actual: checker.display(arg.ty), + }, + SourceLocation::new(table.path().to_owned(), arg.span.clone()), + )); + } + } + let range_ty = ctx + .modules + .foreign + .get(OsStr::new("std")) + .and_then(|module| module.exports.iter().find(|export| export.name == "Range")) + .map(|range| range.ty) + .expect("Range type not found"); + let UserType::Parametrized { schema, params: _ } = checker.types[range_ty] else { + panic!("Expected a parametrized type"); + }; + let constructor_id = checker.registry[schema] + .get_exact_method( + &checker.types, + &checker.registry, + "", + &[INT_TYPE, INT_TYPE, INT_TYPE], + range_ty, + ) + .expect("Range type does not have a constructor"); + TypedExpr { + kind: ExprKind::FunctionCall(FunctionCall { + function_id: constructor_id, + arguments: vec![start, end, step], + }), + ty: range_ty, + span: range.segment(), + } +} + +#[cfg(test)] +mod tests { + use crate::typing::tests::type_check; + use crate::typing::{TypeError, TypeErrorKind}; + use crate::SourceLocation; + use std::path::PathBuf; + + #[test] + fn not_iterable() { + let errors = type_check("for x in 42 { }"); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TraitNotImplemented { + trait_name: "Iterator".to_owned(), + type_name: "Int".to_owned(), + }, + SourceLocation::new(PathBuf::from("main.msh"), 9..11), + )] + ); + } +} diff --git a/analyzer/src/typing/pfc.rs b/analyzer/src/typing/pfc.rs index 0c7710eb..91f89cce 100644 --- a/analyzer/src/typing/pfc.rs +++ b/analyzer/src/typing/pfc.rs @@ -2,6 +2,7 @@ use crate::hir::{ExprKind, FunctionCall, Module, TypedExpr}; use crate::import::{PathItemError, SymbolSearch}; use crate::symbol::SymbolRegistry; use crate::typing::function::Function; +use crate::typing::schema::Schema; use crate::typing::user::{TypeId, UserType, ERROR_TYPE, UNKNOWN_TYPE}; use crate::typing::variable::VariableTable; use crate::typing::{ @@ -9,7 +10,7 @@ use crate::typing::{ }; use crate::SourceLocation; use ast::call::ProgrammaticCall; -use context::source::SourceSegmentHolder; +use context::source::{SourceSegmentHolder, Span}; pub fn ascribe_pfc( ProgrammaticCall { @@ -24,10 +25,14 @@ pub fn ascribe_pfc( ctx @ Context { modules, hint, .. }: Context, errors: &mut Vec, ) -> TypedExpr { - let arguments = arguments + let type_parameters_span: Option = type_parameters + .first() + .zip(type_parameters.last()) + .map(|(first, last)| first.segment().start..last.segment().end); + let mut type_parameters = type_parameters .iter() - .map(|expr| ascribe_type(expr, table, checker, storage, ctx, errors)) - .collect::>(); + .map(|type_param| lookup_type(type_param, table, checker, modules, errors)) + .collect::>(); let res = match SymbolSearch::new(path, &checker.types, modules, table) { Ok(ref search) => { @@ -63,6 +68,21 @@ pub fn ascribe_pfc( let function_id = match checker.types[ty] { UserType::Error => return TypedExpr::error(span.clone()), UserType::Function(function_id) => function_id, + UserType::Parametrized { schema, ref params } => { + let Schema { ref methods, .. } = checker.registry[schema]; + if let Some(constructor) = methods.get("") { + *constructor + } else { + errors.push(TypeError::new( + TypeErrorKind::TraitNotImplemented { + trait_name: "Function".to_owned(), + type_name: checker.display(ty), + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + return TypedExpr::error(span.clone()); + } + } _ => { errors.push(TypeError::new( TypeErrorKind::TypeMismatch { @@ -76,10 +96,25 @@ pub fn ascribe_pfc( } }; - let mut type_parameters = type_parameters + let Function { + ref param_types, .. + } = checker.registry[function_id]; + + let type_hints = param_types .iter() - .map(|type_param| lookup_type(type_param, table, checker, modules, errors)) - .collect::>(); + .map(|param| TypeHint::Required(param.ty)) + .collect::>(); + let arguments = arguments + .iter() + .zip( + type_hints + .into_iter() + .chain(std::iter::repeat(TypeHint::Used)), + ) + .map(|(expr, hint)| { + ascribe_type(expr, table, checker, storage, ctx.with_hint(hint), errors) + }) + .collect::>(); let Function { ref declared_at, @@ -148,6 +183,7 @@ pub fn ascribe_pfc( } = &checker.types[return_type] { if schema == expected_schema { + assert_eq!(fn_return_params.len(), expected_params.len(), "types of the same schema should have the same number of generic parameters"); // First, get the index of the generic_variables in the return_params list for (fn_return_param, fn_actual) in fn_return_params.iter().zip(expected_params) @@ -190,6 +226,14 @@ pub fn ascribe_pfc( }, SourceLocation::new(table.path().to_owned(), span.clone()), )); + } else if type_parameters.len() != generic_variables.len() { + errors.push(TypeError::new( + TypeErrorKind::ArityMismatch { + expected: generic_variables.len(), + received: type_parameters.len(), + }, + SourceLocation::new(table.path().to_owned(), type_parameters_span.unwrap()), + )); } else { for (arg, param) in arguments.iter().zip(param_types.iter()) { let param_ty = checker @@ -209,10 +253,10 @@ pub fn ascribe_pfc( )); } } + return_type = checker + .types + .concretize(return_type, generic_variables, &type_parameters); } - return_type = checker - .types - .concretize(return_type, generic_variables, &type_parameters); TypedExpr { kind: ExprKind::FunctionCall(FunctionCall { diff --git a/analyzer/src/typing/registry.rs b/analyzer/src/typing/registry.rs index 6091bf33..21e191cb 100644 --- a/analyzer/src/typing/registry.rs +++ b/analyzer/src/typing/registry.rs @@ -52,6 +52,8 @@ pub const VEC_SCHEMA: SchemaId = SchemaId(5); pub const GLOB_SCHEMA: SchemaId = SchemaId(6); pub const PID_SCHEMA: SchemaId = SchemaId(7); pub const OPTION_SCHEMA: SchemaId = SchemaId(8); +pub const RANGE_SCHEMA: SchemaId = SchemaId(9); +pub const INCLUSIVE_RANGE_SCHEMA: SchemaId = SchemaId(10); impl Registry { /// Allocates a new [`SchemaId`] for the given [`Schema`]. @@ -67,6 +69,13 @@ impl Registry { self.functions.push(function); FunctionId(id) } + + pub fn iter_schemas(&self) -> impl Iterator { + self.schemas + .iter() + .enumerate() + .map(|(id, schema)| (SchemaId(id), schema)) + } } macro_rules! impl_index { diff --git a/analyzer/src/typing/schema.rs b/analyzer/src/typing/schema.rs index 55dee7ef..41c0297c 100644 --- a/analyzer/src/typing/schema.rs +++ b/analyzer/src/typing/schema.rs @@ -1,6 +1,14 @@ +use crate::hir::{ExprKind, Module, TypedExpr}; +use crate::typing::function::Function; use crate::typing::registry::{FunctionId, Registry}; -use crate::typing::user::TypeArena; -use crate::typing::{Parameter, TypeId}; +use crate::typing::user::{TypeArena, UserType}; +use crate::typing::variable::{LocalId, VariableTable}; +use crate::typing::{ + ascribe_type, Context, Parameter, TypeChecker, TypeError, TypeErrorKind, TypeId, +}; +use crate::{hir, SourceLocation}; +use ast::r#struct::FieldAccess; +use context::source::SourceSegmentHolder; use std::collections::HashMap; /// A structure definition, describing a type with fields and methods. @@ -13,19 +21,25 @@ pub struct Schema { pub generic_variables: Vec, /// The fields and their types. - pub fields: HashMap, + pub fields: Vec, /// The methods and their types. pub methods: HashMap, } +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct SchemaField { + pub name: String, + pub param: Parameter, +} + impl Schema { /// Creates a new schema. pub fn new(name: String) -> Self { Self { name, generic_variables: Vec::new(), - fields: HashMap::new(), + fields: Vec::new(), methods: HashMap::new(), } } @@ -35,7 +49,7 @@ impl Schema { Self { name, generic_variables, - fields: HashMap::new(), + fields: Vec::new(), methods: HashMap::new(), } } @@ -66,3 +80,158 @@ impl Schema { }) } } + +pub(super) fn ascribe_field_access( + FieldAccess { + expr, + field: field_name, + segment: span, + }: &FieldAccess, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let typed_expr = ascribe_type(expr, table, checker, storage, ctx, errors); + if typed_expr.is_err() { + return typed_expr; + } + let UserType::Parametrized { schema, ref params } = checker.types[typed_expr.ty] else { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Struct".to_string(), + expected_due_to: None, + actual: checker.display(typed_expr.ty), + }, + SourceLocation::new(table.path().to_owned(), span.clone()), + )); + return TypedExpr::error(span.clone()); + }; + let Schema { + generic_variables, + fields, + methods, + .. + } = &checker.registry[schema]; + let Some(field_id) = fields + .iter() + .position(|field| field.name == field_name.value) + else { + errors.push(TypeError::new( + if let Some(method) = methods.get(field_name.value.as_str()) { + let Function { + ref param_types, .. + } = checker.registry[*method]; + let mut builder = "(".to_owned(); + for param in param_types { + if param.ty == typed_expr.ty { + continue; + } + if builder.ends_with('(') { + builder.push('_'); + } else { + builder.push_str(", _"); + } + } + builder.push(')'); + TypeErrorKind::MethodLikeFieldAccess { + name: field_name.value.to_string(), + parentheses: builder, + } + } else { + TypeErrorKind::UnknownField { + name: field_name.value.to_string(), + type_name: checker.display(typed_expr.ty), + available: fields.iter().map(|field| field.name.clone()).collect(), + } + }, + SourceLocation::new(table.path().to_owned(), field_name.segment()), + )); + return TypedExpr::error(span.clone()); + }; + let field = &fields[field_id]; + let params = params.clone(); + let field_ty = checker + .types + .concretize(field.param.ty, generic_variables, ¶ms); + TypedExpr { + kind: ExprKind::FieldAccess(hir::FieldAccess { + object: Box::new(typed_expr), + structure: schema, + field: LocalId(field_id), + }), + span: span.clone(), + ty: field_ty, + } +} + +#[cfg(test)] +mod tests { + use crate::typing::tests::type_check; + use crate::typing::{TypeError, TypeErrorKind}; + use crate::SourceLocation; + use std::path::PathBuf; + + #[test] + fn instantiate_structure_incorrect_field() { + let errors = type_check( + "struct Range { start: Int, end: Int } + Range(1, 'a') + ", + ); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Int".to_owned(), + expected_due_to: Some(SourceLocation::new(PathBuf::from("main.msh"), 32..35)), + actual: "String".to_owned(), + }, + SourceLocation::new(PathBuf::from("main.msh"), 59..62), + )] + ); + } + + #[test] + fn instantiate_generic_structure() { + let errors = type_check( + "struct Pair[A, B] { first: A, second: B } + Pair::[Int, String](1, 'a') + ", + ); + assert_eq!(errors, []); + } + + #[test] + fn instantiate_structure_with_concretized_type() { + let errors = type_check( + "struct Box[T] {} + struct Foo { box: Box[Int] } + Foo(Box()) + ", + ); + assert_eq!(errors, []); + } + + #[test] + fn field_assign_incorrect_type() { + let errors = type_check( + "struct Box { item: Bool } + val box = Box(true) + $box.item = 5 + ", + ); + assert_eq!( + errors, + [TypeError::new( + TypeErrorKind::TypeMismatch { + expected: "Bool".to_owned(), + expected_due_to: None, + actual: "Int".to_owned(), + }, + SourceLocation::new(PathBuf::from("main.msh"), 74..83), + )] + ); + } +} diff --git a/compiler/src/emit/iterable.rs b/compiler/src/emit/iterable.rs index 24ccdfe4..ff6f4ee6 100644 --- a/compiler/src/emit/iterable.rs +++ b/compiler/src/emit/iterable.rs @@ -4,8 +4,9 @@ use crate::context::EmitterContext; use crate::emit::native::{STRING_INDEX, STRING_LEN, VEC_INDEX, VEC_LEN}; use crate::emit::{emit, EmissionState}; use crate::locals::LocalsLayout; +use crate::r#type::ValueStackSize; use analyzer::hir::{ForKind, ForLoop, RangeFor, TypedExpr}; -use analyzer::typing::registry::{STRING_SCHEMA, VEC_SCHEMA}; +use analyzer::typing::registry::{RANGE_SCHEMA, STRING_SCHEMA, VEC_SCHEMA}; use analyzer::typing::user::{UserType, INT_TYPE, STRING_TYPE}; use analyzer::typing::variable::LocalId; @@ -75,40 +76,40 @@ pub(super) fn emit_for_loop( state, ); } - // Type::Structure(_, structure_id) => { - // // Int range - // let layout = ctx.get_layout(ReefId(1), *structure_id); - // emit_for_iterable( - // range, - // &it.body, - // |iterator_id, instructions, _, locals| { - // // Emit start - // instructions.emit_get_local(iterator_id, type_ref.into(), locals); - // instructions.emit_get_field(LocalId(0), layout); - // }, - // |instructions, _| { - // instructions.emit_code(Opcode::Swap); - // instructions.emit_pop(ValueStackSize::QWord); - // }, - // |instructions, _| { - // instructions.emit_get_field(LocalId(1), layout); - // }, - // if *structure_id == StructureId(0) { - // Opcode::IntLessThan - // } else { - // Opcode::IntLessOrEqual - // }, - // |iterator_id, instructions, _, locals| { - // instructions.emit_get_local(iterator_id, type_ref.into(), locals); - // instructions.emit_get_field(LocalId(2), layout); - // }, - // instructions, - // ctx, - // cp, - // locals, - // state, - // ); - // } + UserType::Parametrized { schema, .. } => { + // Int range + let layout = &ctx.layouts[schema.get()]; + emit_for_iterable( + range, + &it.body, + |iterator_id, instructions, _, locals| { + // Emit start + instructions.emit_get_local(iterator_id, type_ref.into(), locals); + instructions.emit_get_field(LocalId(0), layout); + }, + |instructions, _| { + instructions.emit_code(Opcode::Swap); + instructions.emit_pop(ValueStackSize::QWord); + }, + |instructions, _| { + instructions.emit_get_field(LocalId(1), layout); + }, + if *schema == RANGE_SCHEMA { + Opcode::IntLessThan + } else { + Opcode::IntLessOrEqual + }, + |iterator_id, instructions, _, locals| { + instructions.emit_get_local(iterator_id, type_ref.into(), locals); + instructions.emit_get_field(LocalId(2), layout); + }, + instructions, + ctx, + cp, + locals, + state, + ); + } _ => panic!("Unexpected iterable {iterable_type:?} type"), } } diff --git a/compiler/src/emit/native.rs b/compiler/src/emit/native.rs index 3793de85..6309c649 100644 --- a/compiler/src/emit/native.rs +++ b/compiler/src/emit/native.rs @@ -6,7 +6,7 @@ use crate::r#type::ValueStackSize; use analyzer::hir::MethodCall; use analyzer::typing::function::FunctionKind; use analyzer::typing::user; -use analyzer::typing::user::{TypeId, UserType}; +use analyzer::typing::user::TypeId; const STRING_EQ: &str = "lang::String::eq"; const STRING_CONCAT: &str = "lang::String::concat"; diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 5a92ea46..16ea0ed2 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -29,7 +29,8 @@ pub trait SourceLineProvider { #[derive(Default)] pub struct CompilerState { - pub constant_pool: ConstantPool, + constant_pool: ConstantPool, + pub layouts: Vec, } #[derive(Default)] @@ -44,11 +45,17 @@ pub fn compile_reef( database: &Database, reef: &Reef, writer: &mut impl Write, - CompilerState { constant_pool: cp }: &mut CompilerState, + CompilerState { + constant_pool: cp, + layouts, + }: &mut CompilerState, options: CompilerOptions, ) -> Result<(), io::Error> { let mut bytecode = Bytecode::default(); - let layouts = Vec::::new(); + layouts.clear(); + for (_, schema) in database.checker.registry.iter_schemas() { + layouts.push(StructureLayout::from(schema)); + } for EncodableContent { main, @@ -59,7 +66,7 @@ pub fn compile_reef( let ctx = EmitterContext { types: &database.checker.types, registry: &database.checker.registry, - layouts: &layouts, + layouts, }; let mut page_size = 0u32; @@ -78,6 +85,12 @@ pub fn compile_reef( write_exported(cp, page_size, &mut bytecode)?; bytecode.emit_u32(layouts.len() as u32); + for layout in layouts.iter() { + bytecode.emit_constant_ref(cp.insert_string(&layout.name)); + bytecode.emit_u32(layout.total_size); + bytecode.emit_u32(0); + } + bytecode.emit_u32(functions.len() as u32); for function in functions { diff --git a/compiler/src/locals.rs b/compiler/src/locals.rs index 893d9fd9..a3ce14b5 100644 --- a/compiler/src/locals.rs +++ b/compiler/src/locals.rs @@ -49,7 +49,10 @@ impl LocalsLayout { } pub fn refs_offset(self) -> Vec { - Vec::new() + self.values_indexes + .into_iter() + .filter_map(|(pos, is_obj)| is_obj.then_some(pos)) + .collect() } pub fn get_capture_index(&self, var: LocalId) -> Option { diff --git a/compiler/src/structure.rs b/compiler/src/structure.rs index b1a04c44..6cb8cc8c 100644 --- a/compiler/src/structure.rs +++ b/compiler/src/structure.rs @@ -4,6 +4,7 @@ use analyzer::typing::variable::LocalId; #[derive(Debug, Clone, PartialEq)] pub struct StructureLayout { + pub(crate) name: String, field_offset: usize, pub(crate) total_size: u32, indexes: Vec<(u32, ValueStackSize)>, @@ -14,13 +15,14 @@ impl From<&Schema> for StructureLayout { let mut indexes = Vec::new(); let mut idx = 0; - for field in structure.fields.values() { - let field_size = ValueStackSize::from(field.ty); + for field in structure.fields.iter() { + let field_size = ValueStackSize::from(field.param.ty); indexes.push((idx, field_size)); idx += u8::from(field_size) as u32; } Self { + name: structure.name.clone(), field_offset: structure.generic_variables.len(), total_size: idx, indexes, diff --git a/vm/src/stdlib_natives.cpp b/vm/src/stdlib_natives.cpp index 7bea9ad4..29acdd84 100644 --- a/vm/src/stdlib_natives.cpp +++ b/vm/src/stdlib_natives.cpp @@ -406,6 +406,8 @@ natives_functions_t load_natives() { {"std::home_dir", home_dir}, {"std::current_home_dir", current_home_dir}, + {"std::convert::parse_int_radix", parse_int_radix}, + {"std::memory::gc", gc}, {"std::memory::empty_operands", is_operands_empty}, {"std::memory::program_arguments", program_arguments}, @@ -413,7 +415,6 @@ natives_functions_t load_natives() { {"std::math::ceil", ceil}, {"std::math::floor", floor}, {"std::math::round", round}, - {"std::math::parse_int_radix", parse_int_radix}, {"std::process::get_fd_path", get_fd_path}, {"std::process::wait", process_wait}, diff --git a/vm/tests/integration/runner.rs b/vm/tests/integration/runner.rs index b30e4933..0916a8b8 100644 --- a/vm/tests/integration/runner.rs +++ b/vm/tests/integration/runner.rs @@ -4,6 +4,7 @@ use std::path::{Path, PathBuf}; use analyzer::hir::ExprKind; use analyzer::typing::user::{TypeId, UserType}; +use analyzer::typing::variable::LocalId; use analyzer::typing::{registry, user}; use analyzer::{ analyze_multi, append_source, freeze_exports, Database, FileImporter, Filesystem, Reef, @@ -175,23 +176,23 @@ impl Runner { } UserType::Parametrized { schema, params: _ } => { let structure = &self.database.checker.registry[*schema]; - // let structure_fields = structure.get_fields(); - // let structure_layout = &self.current_compiled_reef.layouts[structure_id.0]; - // - // let structure_data = value.get_as_obj().get_as_struct(); - // let structure_values = structure_fields - // .into_iter() - // .map(|field| { - // let (pos, _) = structure_layout.get_emplacement(field.local_id); - // let field_value = VmValueFFI::ptr( - // *structure_data.as_ptr().add(pos as usize).cast(), - // ); - // self.extract_value(field_value, field.ty) - // }) - // .collect(); - // - // Some(VmValue::Struct(structure_values)) - todo!() + let structure_layout = &self.compiler_state.layouts[schema.get()]; + + let structure_data = value.get_as_obj().get_as_struct(); + let structure_values = structure + .fields + .iter() + .enumerate() + .map(|(id, field)| { + let (pos, _) = structure_layout.get_emplacement(LocalId(id)); + let field_value = VmValueFFI::ptr( + *structure_data.as_ptr().add(pos as usize).cast(), + ); + self.extract_value(field_value, field.param.ty) + }) + .collect(); + + Some(VmValue::Struct(structure_values)) } _ => panic!("unknown object"), }, From 322aa92beed1e91b5804abe3f58751d476e7c8ca Mon Sep 17 00:00:00 2001 From: syldium Date: Sun, 23 Mar 2025 16:25:22 +0100 Subject: [PATCH 11/11] Implement most of the implicit idioms --- analyzer/src/typing.rs | 56 ++++++++++++++------- analyzer/src/typing/lower.rs | 89 ++++++++++++++++++++++++++++++++- analyzer/src/typing/operator.rs | 14 +++++- analyzer/src/typing/pfc.rs | 31 ++++++------ analyzer/src/typing/shell.rs | 72 ++++++++++++++++++++++++-- compiler/src/emit/native.rs | 22 ++++++++ lib/std.msh | 12 +++++ vm/tests/integration/flow.rs | 4 +- 8 files changed, 257 insertions(+), 43 deletions(-) diff --git a/analyzer/src/typing.rs b/analyzer/src/typing.rs index 469fee4d..45fd867e 100644 --- a/analyzer/src/typing.rs +++ b/analyzer/src/typing.rs @@ -21,15 +21,14 @@ use crate::typing::assign::{ use crate::typing::flow::{ascribe_control, ascribe_while}; use crate::typing::function::Function; use crate::typing::iterable::{ascribe_for, ascribe_range}; -use crate::typing::lower::{ascribe_template_string, coerce_condition}; +use crate::typing::lower::{ + ascribe_template_string, coerce_condition, lower_implicit_cast, Implicit, +}; use crate::typing::operator::{ascribe_binary, ascribe_unary}; use crate::typing::pfc::ascribe_pfc; use crate::typing::registry::{Registry, SchemaId}; use crate::typing::schema::{ascribe_field_access, Schema}; -use crate::typing::shell::{ - ascribe_call, ascribe_detached, ascribe_file_pattern, ascribe_pipeline, ascribe_redirected, - ascribe_substitution, -}; +use crate::typing::shell::{ascribe_call, ascribe_detached, ascribe_file_pattern, ascribe_pipeline, ascribe_redirected, ascribe_subshell, ascribe_substitution, ascribe_tilde}; use crate::typing::user::{ lookup_builtin_type, TypeArena, TypeId, UserType, BOOL_TYPE, ERROR_TYPE, FLOAT_TYPE, INT_TYPE, NOTHING_TYPE, STRING_TYPE, UNIT_TYPE, UNKNOWN_TYPE, @@ -253,6 +252,10 @@ impl From for PipelineError { } /// Informs the type inference algorithm about the locally expected type. +/// +/// Note that it is only informative, and it should not be enforced, i.e. no error should be raised +/// if the type is not the expected one. Such checks should be made by the receiver of the type and +/// not the producer. #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum TypeHint { /// The return type is immediately discarded. @@ -384,23 +387,26 @@ fn ascribe_type( .as_ref() .map(|ty| lookup_type(ty, table, checker, modules, errors)); let ctx = ctx.with_hint(expected_ty.map_or(TypeHint::Used, TypeHint::Required)); - let typed_initializer = ascribe_type(initializer, table, checker, storage, ctx, errors); + let mut typed_initializer = + ascribe_type(initializer, table, checker, storage, ctx, errors); let mut ty = typed_initializer.ty; if typed_initializer.is_ok() { if let Some(expected_ty) = expected_ty { - if let Err(_) = checker.types.unify(ty, expected_ty) { - errors.push(TypeError::new( - TypeErrorKind::TypeMismatch { - expected: checker.display(expected_ty), - expected_due_to: Some(SourceLocation::new( + typed_initializer = lower_implicit_cast( + typed_initializer, + Implicit { + assign_to: expected_ty, + expected_due_to: var.ty.as_ref().map(|ty| { + SourceLocation::new( table.path().to_owned(), var.ty.as_ref().unwrap().segment(), - )), - actual: checker.display(ty), - }, - SourceLocation::new(table.path().to_owned(), initializer.segment()), - )); - } + ) + }), + }, + checker, + table.path(), + errors, + ); ty = expected_ty; } } @@ -469,6 +475,10 @@ fn ascribe_type( Expr::Pipeline(pipeline) => { ascribe_pipeline(pipeline, table, checker, storage, ctx, errors) } + Expr::Subshell(subshell) => { + ascribe_subshell(subshell, table, checker, storage, ctx, errors) + } + Expr::Tilde(tilde) => ascribe_tilde(tilde, table, checker, storage, ctx, errors), Expr::Range(iterable) => match iterable { Iterable::Range(range) => ascribe_range(range, table, checker, storage, ctx, errors), Iterable::Files(pattern) => { @@ -1571,4 +1581,16 @@ mod tests { )] ); } + + #[test] + fn implicit_exit_to_bool_var() { + let errors = type_check("val res: Bool = { /bin/true }"); + assert_eq!(errors, []); + } + + #[test] + fn implicit_exit_to_bool_call() { + let errors = type_check("fun test(b: Bool); test({ /bin/true })"); + assert_eq!(errors, []); + } } diff --git a/analyzer/src/typing/lower.rs b/analyzer/src/typing/lower.rs index 26ea740c..e5e9ec48 100644 --- a/analyzer/src/typing/lower.rs +++ b/analyzer/src/typing/lower.rs @@ -1,6 +1,8 @@ use crate::hir::{ExprKind, MethodCall, Module, TypedExpr}; -use crate::typing::registry::STRING_SCHEMA; -use crate::typing::user::{UserType, BOOL_TYPE, EXITCODE_TYPE, STRING_TYPE}; +use crate::typing::registry::{OPTION_SCHEMA, STRING_SCHEMA}; +use crate::typing::user::{ + TypeId, UserType, BOOL_TYPE, EXITCODE_TYPE, FLOAT_TYPE, INT_TYPE, STRING_TYPE, +}; use crate::typing::variable::VariableTable; use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; use crate::SourceLocation; @@ -143,3 +145,86 @@ pub(super) fn coerce_condition( } } } + +pub(super) struct Implicit { + pub(super) assign_to: TypeId, + pub(super) expected_due_to: Option, +} + +impl Implicit { + pub(super) fn new(assign_to: TypeId) -> Self { + Self { + assign_to, + expected_due_to: None, + } + } +} + +/// Lower hard-coded implicit casts. +pub(super) fn lower_implicit_cast( + rhs: TypedExpr, + Implicit { + assign_to, + expected_due_to, + }: Implicit, + checker: &TypeChecker, + path: &Path, + errors: &mut Vec, +) -> TypedExpr { + if checker.types.are_same(rhs.ty, assign_to) || rhs.is_err() { + return rhs; + } + let span = rhs.span.clone(); + if rhs.ty == INT_TYPE && assign_to == FLOAT_TYPE { + TypedExpr { + kind: ExprKind::Cast(Box::new(rhs)), + ty: assign_to, + span, + } + } else if rhs.ty == EXITCODE_TYPE && assign_to == BOOL_TYPE { + TypedExpr { + kind: ExprKind::Cast(Box::new(rhs)), + ty: assign_to, + span, + } + } else { + errors.push(TypeError::new( + TypeErrorKind::TypeMismatch { + expected: checker.display(assign_to), + expected_due_to, + actual: checker.display(rhs.ty), + }, + SourceLocation::new(path.to_owned(), span), + )); + rhs + } +} + +/// Generates a conversion method call if needed. +pub(super) fn generate_unwrap(typed: TypedExpr, checker: &mut TypeChecker) -> TypedExpr { + let UserType::Parametrized { + schema: instantiated, + ref params, + } = checker.types[typed.ty] + else { + return typed; + }; + if instantiated != OPTION_SCHEMA { + return typed; + } + let return_type = *params.first().unwrap(); + let span = typed.span.clone(); + let unwrap_id = *checker.registry[OPTION_SCHEMA] + .methods + .get("unwrap") + .expect("Option schema should have an `unwrap` method."); + TypedExpr { + kind: ExprKind::MethodCall(MethodCall { + callee: Box::new(typed), + arguments: vec![], + function_id: unwrap_id, + }), + ty: return_type, + span, + } +} diff --git a/analyzer/src/typing/operator.rs b/analyzer/src/typing/operator.rs index 729b5021..95471ed2 100644 --- a/analyzer/src/typing/operator.rs +++ b/analyzer/src/typing/operator.rs @@ -1,6 +1,7 @@ use crate::hir::{ExprKind, MethodCall, Module, TypedExpr}; use crate::typing::function::Function; -use crate::typing::user::UserType; +use crate::typing::lower::{lower_implicit_cast, Implicit}; +use crate::typing::user::{UserType, BOOL_TYPE, EXITCODE_TYPE}; use crate::typing::variable::VariableTable; use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind}; use crate::SourceLocation; @@ -15,10 +16,19 @@ pub(super) fn ascribe_unary( ctx: Context, errors: &mut Vec, ) -> TypedExpr { - let typed_expr = ascribe_type(&unary.expr, table, checker, storage, ctx, errors); + let mut typed_expr = ascribe_type(&unary.expr, table, checker, storage, ctx, errors); if typed_expr.is_err() { return typed_expr; } + if unary.op == UnaryOperator::Not && typed_expr.ty == EXITCODE_TYPE { + typed_expr = lower_implicit_cast( + typed_expr, + Implicit::new(BOOL_TYPE), + checker, + table.path(), + errors, + ); + } let UserType::Parametrized { schema, params: _ } = checker.types[typed_expr.ty] else { panic!("Expected a parametrized type"); }; diff --git a/analyzer/src/typing/pfc.rs b/analyzer/src/typing/pfc.rs index 91f89cce..d14d7e9d 100644 --- a/analyzer/src/typing/pfc.rs +++ b/analyzer/src/typing/pfc.rs @@ -2,6 +2,7 @@ use crate::hir::{ExprKind, FunctionCall, Module, TypedExpr}; use crate::import::{PathItemError, SymbolSearch}; use crate::symbol::SymbolRegistry; use crate::typing::function::Function; +use crate::typing::lower::{lower_implicit_cast, Implicit}; use crate::typing::schema::Schema; use crate::typing::user::{TypeId, UserType, ERROR_TYPE, UNKNOWN_TYPE}; use crate::typing::variable::VariableTable; @@ -104,7 +105,7 @@ pub fn ascribe_pfc( .iter() .map(|param| TypeHint::Required(param.ty)) .collect::>(); - let arguments = arguments + let mut arguments = arguments .iter() .zip( type_hints @@ -235,23 +236,23 @@ pub fn ascribe_pfc( SourceLocation::new(table.path().to_owned(), type_parameters_span.unwrap()), )); } else { - for (arg, param) in arguments.iter().zip(param_types.iter()) { + for (arg, param) in arguments.iter_mut().zip(param_types.iter()) { let param_ty = checker .types .concretize(param.ty, generic_variables, &type_parameters); - if let Err(_) = checker.types.unify(arg.ty, param_ty) { - errors.push(TypeError::new( - TypeErrorKind::TypeMismatch { - expected: checker.display(param_ty), - expected_due_to: Some(SourceLocation::new( - declared_at.clone(), - param.span.clone(), - )), - actual: checker.display(arg.ty), - }, - SourceLocation::new(table.path().to_owned(), arg.span.clone()), - )); - } + *arg = lower_implicit_cast( + arg.clone(), + Implicit { + assign_to: param_ty, + expected_due_to: Some(SourceLocation::new( + declared_at.clone(), + param.span.clone(), + )), + }, + checker, + table.path(), + errors, + ); } return_type = checker .types diff --git a/analyzer/src/typing/shell.rs b/analyzer/src/typing/shell.rs index 8ad1b3d1..01f17ddf 100644 --- a/analyzer/src/typing/shell.rs +++ b/analyzer/src/typing/shell.rs @@ -1,12 +1,10 @@ use crate::hir::{ ExprKind, MethodCall, Module, Redir, Redirect, Subprocess, Substitute, TypedExpr, }; -use crate::typing::lower::convert_into_string; +use crate::typing::lower::{convert_into_string, generate_unwrap}; use crate::typing::pfc::ascribe_pfc; use crate::typing::registry::GLOB_SCHEMA; -use crate::typing::user::{ - EXITCODE_TYPE, GLOB_TYPE, INT_TYPE, PID_TYPE, STRING_TYPE, STRING_VECTOR_TYPE, -}; +use crate::typing::user::{EXITCODE_TYPE, GLOB_TYPE, INT_TYPE, PID_TYPE, STRING_TYPE, STRING_VECTOR_TYPE, UNIT_TYPE}; use crate::typing::variable::VariableTable; use crate::typing::{ascribe_type, Context, TypeChecker, TypeError, TypeErrorKind, TypeHint}; use crate::SourceLocation; @@ -15,8 +13,9 @@ use ast::r#use::InclusionPathItem; use ast::range::FilePattern; use ast::substitution::Substitution; use ast::value::{Literal, LiteralValue}; -use ast::variable::Identifier; +use ast::variable::{Identifier, Tilde, TildeExpansion}; use ast::Expr; +use ast::group::Subshell; use context::source::SourceSegmentHolder; pub(super) fn ascribe_call( @@ -187,6 +186,42 @@ pub(super) fn ascribe_pipeline( } } +pub(super) fn ascribe_subshell( + subshell: &Subshell, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let block = subshell + .expressions + .iter() + .map(|expr| { + ascribe_type( + expr, + table, + checker, + storage, + ctx.with_hint(TypeHint::Unused), + errors, + ) + }) + .collect::>(); + TypedExpr { + kind: ExprKind::Subprocess(Subprocess { + inner: Box::new(TypedExpr { + kind: ExprKind::Block(block), + ty: UNIT_TYPE, + span: subshell.segment(), + }), + awaited: true, + }), + ty: EXITCODE_TYPE, + span: subshell.segment(), + } +} + pub(super) fn ascribe_substitution( substitution: &Substitution, table: &mut VariableTable, @@ -233,3 +268,30 @@ pub(super) fn ascribe_file_pattern( } expr } + +pub(super) fn ascribe_tilde( + tilde: &TildeExpansion, + table: &mut VariableTable, + checker: &mut TypeChecker, + storage: &mut Module, + ctx: Context, + errors: &mut Vec, +) -> TypedExpr { + let span = tilde.segment(); + let (name, arg): (&str, Option<&Expr>) = match &tilde.structure { + Tilde::HomeDir(Some(username)) => ("home_dir", Some(username)), + Tilde::HomeDir(None) => ("current_home_dir", None), + Tilde::WorkingDir => ("working_dir", None), + }; + let pfc = ProgrammaticCall { + path: vec![ + InclusionPathItem::Symbol(Identifier::new("std".into(), span.start)), + InclusionPathItem::Symbol(Identifier::new(name.into(), span.start)), + ], + segment: span, + arguments: Vec::from_iter(arg.cloned()), + type_parameters: Vec::new(), + }; + let typed = ascribe_pfc(&pfc, table, checker, storage, ctx, errors); + generate_unwrap(typed, checker) +} diff --git a/compiler/src/emit/native.rs b/compiler/src/emit/native.rs index 6309c649..ad11d1f0 100644 --- a/compiler/src/emit/native.rs +++ b/compiler/src/emit/native.rs @@ -120,6 +120,9 @@ fn emit_intrinsic_instructions( "Bool/ne" => { instructions.emit_code(Opcode::BXor); } + "Exitcode/to_int" => { + instructions.emit_code(Opcode::ConvertByteToInt); + } "Int/add" => { instructions.emit_code(Opcode::IntAdd); } @@ -160,6 +163,25 @@ fn emit_intrinsic_instructions( "Int/to_string" => { instructions.emit_invoke(cp.insert_string(INT_TO_STRING)); } + "Float/add" => { + instructions.emit_code(Opcode::FloatAdd); + } + "Float/sub" => { + instructions.emit_code(Opcode::FloatSub); + } + "Float/mul" => { + instructions.emit_code(Opcode::FloatMul); + } + "Float/div" => { + instructions.emit_code(Opcode::FloatDiv); + } + "Float/eq" => { + instructions.emit_code(Opcode::FloatEqual); + } + "Float/ne" => { + instructions.emit_code(Opcode::FloatEqual); + instructions.emit_bool_inversion(); + } "String/eq" => { instructions.emit_invoke(cp.insert_string(STRING_EQ)); } diff --git a/lib/std.msh b/lib/std.msh index 4f9e8cf9..6dd90ce0 100644 --- a/lib/std.msh +++ b/lib/std.msh @@ -15,6 +15,8 @@ impl Bool { impl Exitcode { fun and(self, other: Exitcode) -> Exitcode; fun or(self, other: Exitcode) -> Exitcode; + + fun to_int(self) -> Int; } impl Int { @@ -35,6 +37,16 @@ impl Int { fun to_exitcode(self) -> Exitcode; } +impl Float { + fun add(self, other: Float) -> Float; + fun sub(self, other: Float) -> Float; + fun mul(self, other: Float) -> Float; + fun div(self, other: Float) -> Float; + + fun eq(self, other: Float) -> Bool; + fun ne(self, other: Float) -> Bool; +} + impl String { fun eq(self, other: String) -> Bool; fun ne(self, other: String) -> Bool; diff --git a/vm/tests/integration/flow.rs b/vm/tests/integration/flow.rs index b6d52824..274747b1 100644 --- a/vm/tests/integration/flow.rs +++ b/vm/tests/integration/flow.rs @@ -166,9 +166,9 @@ fn simple_function_call() { #[test] fn operators() { let mut runner = Runner::default(); - runner.eval("use std::assert"); runner.eval( " + use std::assert::assert assert(1 + 1 == 2) assert(1 - 1 == 0) assert(1 > 1 == false) @@ -231,7 +231,7 @@ fn str_split() { fn exitcode_to_bool() { let mut runner = Runner::default(); runner.eval( - "use std::assert + "use std::assert::assert assert({ /bin/true }) assert(!{ /bin/false }) assert({ ! /bin/false })",