[go: up one dir, main page]

Skip to main content

sql_docs/
comments.rs

1//! Extract comment spans from parsed SQL files.
2//!
3//! Definitions used throughout this crate:
4//! - **leading**: a comment that appears on lines immediately preceding a statement/column
5//! - **inline**: a comment that appears after code on the same line (ignored)
6//! - **interstitial**: a comment inside a statement (ignored)
7
8use std::fmt;
9
10use crate::ast::ParsedSqlFile;
11
12/// Represents a line/column location within a source file.
13#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd)]
14pub struct Location {
15    line: u64,
16    column: u64,
17}
18
19impl Location {
20    /// Method for instantiating a new [`Location`]
21    ///
22    /// # Parameters
23    /// - line: the [`u64`] value of the line location
24    /// - column: the [`u64`] value of the column location
25    #[must_use]
26    pub const fn new(line: u64, column: u64) -> Self {
27        Self { line, column }
28    }
29
30    /// Getter method for getting the line value
31    #[must_use]
32    pub const fn line(&self) -> u64 {
33        self.line
34    }
35
36    /// Getter method for getting the column value
37    #[must_use]
38    pub const fn column(&self) -> u64 {
39        self.column
40    }
41}
42
43impl Default for Location {
44    fn default() -> Self {
45        Self::new(1, 1)
46    }
47}
48
49/// Represents a start/end span (inclusive/exclusive as used by this crate) for a comment in a file.
50#[derive(Clone, Copy, Debug, Eq, PartialEq)]
51pub struct Span {
52    start: Location,
53    end: Location,
54}
55
56impl Span {
57    /// Method for creating a new instance of the [`Span`] for a
58    /// comment's span
59    ///
60    /// # Parameters
61    /// - the [`Location`] where the comment starts in the file
62    /// - the [`Location`] where the comment ends in the file
63    #[must_use]
64    pub const fn new(start: Location, end: Location) -> Self {
65        Self { start, end }
66    }
67
68    /// Getter for the start location of a [`Span`]
69    #[must_use]
70    pub const fn start(&self) -> &Location {
71        &self.start
72    }
73
74    /// Getter for the end location of a [`Span`]
75    #[must_use]
76    pub const fn end(&self) -> &Location {
77        &self.end
78    }
79}
80
81impl Default for Span {
82    fn default() -> Self {
83        Self::new(Location::default(), Location::default())
84    }
85}
86
87/// Enum for differentiating comments by single line `--` and
88/// multiline `/* */`
89#[derive(Clone, Debug, Eq, PartialEq)]
90pub enum CommentKind {
91    /// Enum variant for Multiline Comments
92    MultiLine,
93    /// Enum variant for Single Line Comments
94    SingleLine,
95}
96
97/// Structure for containing the [`CommentKind`] and the [`Span`] for a comment
98#[derive(Clone, Debug, Eq, PartialEq)]
99pub struct Comment {
100    text: String,
101    kind: CommentKind,
102    span: Span,
103}
104
105impl Comment {
106    /// Method for making a new comment
107    ///
108    /// # Parameters
109    /// - `kind` where the type of comment is passed as a [`CommentKind`]
110    /// - `span` where the [`Span`] of the comment is passed
111    #[must_use]
112    pub const fn new(text: String, kind: CommentKind, span: Span) -> Self {
113        Self { text, kind, span }
114    }
115
116    /// Getter method to get the [`CommentKind`]
117    #[must_use]
118    pub const fn kind(&self) -> &CommentKind {
119        &self.kind
120    }
121
122    /// Getter method to get the [`Span`] of the comment
123    #[must_use]
124    pub const fn span(&self) -> &Span {
125        &self.span
126    }
127
128    /// Getter method that will return the comment content as a [`str`],
129    /// regardless of [`CommentKind`]
130    #[must_use]
131    pub fn text(&self) -> &str {
132        &self.text
133    }
134}
135
136/// Enum for returning errors withe Comment parsing
137#[derive(Clone, Debug, Eq, PartialEq)]
138pub enum CommentError {
139    /// Found a multiline comment terminator `*/` without a matching opener `/*`
140    UnmatchedMultilineCommentStart {
141        /// Returns the location of the terminator found
142        location: Location,
143    },
144    /// Found a multiline comment that is not properly terminated before EOF
145    UnterminatedMultiLineComment {
146        /// Returns the location of where the multiline comment started
147        start: Location,
148    },
149}
150
151impl fmt::Display for CommentError {
152    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
153        match self {
154            Self::UnmatchedMultilineCommentStart { location } => {
155                write!(
156                    f,
157                    "unmatched block comment start at line {}, column {}",
158                    location.line(),
159                    location.column()
160                )
161            }
162            Self::UnterminatedMultiLineComment { start } => {
163                write!(
164                    f,
165                    "unterminated block comment with start at line {}, column {}",
166                    start.line(),
167                    start.column(),
168                )
169            }
170        }
171    }
172}
173
174impl std::error::Error for CommentError {}
175
176/// Alias for comment results that may return a [`CommentError`]
177pub type CommentResult<T> = Result<T, CommentError>;
178
179/// Structure for holding all comments found in the document
180#[derive(Debug, Eq, PartialEq)]
181pub struct Comments {
182    comments: Vec<Comment>,
183}
184
185impl Comments {
186    /// Method for generating a new [`Comments`] struct, which sorts comments
187    /// based on their starting span location
188    ///
189    /// # Parameters
190    /// - `comments`: mutable [`Vec<Comment>`] that will be sorted by span start
191    #[must_use]
192    pub fn new(mut comments: Vec<Comment>) -> Self {
193        // Always keep comments ordered by their span
194        comments.sort_by(|a, b| {
195            let a_start = a.span().start();
196            let b_start = b.span().start();
197
198            a_start
199                .line()
200                .cmp(&b_start.line())
201                .then_with(|| a_start.column().cmp(&b_start.column()))
202        });
203
204        Self { comments }
205    }
206
207    /// Build all leading comments from a parsed SQL file
208    ///
209    /// # Parameters
210    /// - `file`: the [`ParsedSqlFile`] that needs to be parsed for comments
211    ///
212    /// # Errors
213    /// - Will return [`CommentError::UnmatchedMultilineCommentStart`] if a
214    ///   comment does not have an opening `/*`
215    /// - Will return [`CommentError::UnterminatedMultiLineComment`] if a
216    ///   multiline comment doesn't end before `EOF`
217    pub fn parse_all_comments_from_file(file: &ParsedSqlFile) -> CommentResult<Self> {
218        let src = file.content();
219        let comments = Self::scan_comments(src)?;
220        Ok(comments)
221    }
222
223    /// Scans the raw file and collects all comments
224    ///
225    /// # Parameters
226    /// - `src` which is the `SQL` file content as a [`str`]
227    ///
228    /// # Errors
229    /// - `UnmatchedMultilineCommentStart` : will return error if unable to find
230    ///   a starting `/*` for a multiline comment
231    /// - `UnterminatedMultiLineComment` : will return error if there is an
232    ///   unterminated multiline comment, found at EOF
233    pub fn scan_comments(src: &str) -> CommentResult<Self> {
234        let mut comments = Vec::new();
235
236        let mut start_line = 1u64;
237        let mut start_col = 1u64;
238
239        let mut line_num = 1u64;
240        let mut col;
241
242        let mut in_single = false;
243        let mut in_multi = false;
244
245        let mut buf = String::new();
246
247        for line in src.lines() {
248            col = 1;
249            let mut chars = line.chars().peekable();
250            while let Some(c) = chars.next() {
251                match (in_single, in_multi, c) {
252                    (false, false, '-') => {
253                        if chars.peek().copied() == Some('-') {
254                            chars.next();
255                            in_single = true;
256                            start_line = line_num;
257                            start_col = col;
258                            buf.clear();
259                            col += 1;
260                        }
261                    }
262                    (false, false, '/') => {
263                        if chars.peek().copied() == Some('*') {
264                            chars.next();
265                            in_multi = true;
266                            start_line = line_num;
267                            start_col = col;
268                            buf.clear();
269                            col += 1;
270                        }
271                    }
272                    (false, false, '*') => {
273                        if chars.peek().copied() == Some('/') {
274                            let loc = Location::new(line_num, col);
275                            return Err(CommentError::UnmatchedMultilineCommentStart {
276                                location: loc,
277                            });
278                        }
279                    }
280                    (false, true, '*') => {
281                        if chars.peek().copied() == Some('/') {
282                            chars.next();
283                            let end_loc = Location::new(line_num, col + 1);
284                            let normalized_comment = buf
285                                .lines()
286                                .enumerate()
287                                .map(|(i, line)| match i {
288                                    0 => line.trim().to_owned(),
289                                    _ => "\n".to_owned() + line.trim(),
290                                })
291                                .collect();
292                            comments.push(Comment::new(
293                                normalized_comment,
294                                CommentKind::MultiLine,
295                                Span::new(
296                                    Location { line: start_line, column: start_col },
297                                    end_loc,
298                                ),
299                            ));
300                            in_multi = false;
301                            buf.clear();
302                            col += 1;
303                        } else {
304                            buf.push('*');
305                        }
306                    }
307                    (false, true, ch) | (true, false, ch) => {
308                        buf.push(ch);
309                    }
310                    (false, false, _) => {}
311                    (true, true, _) => {
312                        unreachable!("should not be possible to be in multiline and single line")
313                    }
314                }
315                col += 1;
316            }
317            if in_single {
318                in_single = false;
319                let end_loc = Location::new(line_num, col);
320                comments.push(Comment::new(
321                    buf.trim().to_owned(),
322                    CommentKind::SingleLine,
323                    Span::new(Location { line: start_line, column: start_col }, end_loc),
324                ));
325                buf.clear();
326            } else if in_multi {
327                buf.push('\n');
328            }
329            line_num += 1;
330        }
331        // EOF: close any open comments
332        if in_multi {
333            return Err(CommentError::UnterminatedMultiLineComment {
334                start: Location { line: start_line, column: start_col },
335            });
336        }
337
338        Ok(Self { comments })
339    }
340
341    /// Getter method for retrieving the Vec of [`Comment`]
342    #[must_use]
343    pub fn comments(&self) -> &[Comment] {
344        &self.comments
345    }
346
347    /// Finds a single comment before a specific line or returns none
348    ///
349    /// # Parameters
350    /// - [`Comments`] object
351    /// - An `u64` value representing the desired line to check above.
352    #[must_use]
353    pub fn leading_comment(&self, line: u64) -> Option<&Comment> {
354        self.comments().iter().rev().find(|comment| comment.span().end().line() + 1 == line)
355    }
356
357    /// Finds leading comments before specific line based on [`LeadingCommentCapture`] preference
358    ///
359    /// # Parameters
360    /// - [`Comments`] object
361    /// - An `u64` value representing the desired line to check above.
362    /// - [`LeadingCommentCapture`] preference
363    #[must_use]
364    pub fn leading_comments(&self, line: u64, capture: LeadingCommentCapture) -> Self {
365        let mut comments: Vec<Comment> = Vec::new();
366        let mut current_line = line;
367        let mut seen_multiline = false;
368        while let Some(leading_comment) = self.leading_comment(current_line) {
369            match capture {
370                LeadingCommentCapture::SingleNearest => {
371                    comments.push(leading_comment.to_owned());
372                    break;
373                }
374                LeadingCommentCapture::AllLeading => comments.push(leading_comment.to_owned()),
375                LeadingCommentCapture::AllSingleOneMulti => match leading_comment.kind() {
376                    CommentKind::MultiLine if seen_multiline => break,
377                    CommentKind::MultiLine => {
378                        seen_multiline = true;
379                        comments.push(leading_comment.to_owned());
380                    }
381                    CommentKind::SingleLine => comments.push(leading_comment.to_owned()),
382                },
383            }
384            current_line = leading_comment.span().start().line();
385        }
386        comments.reverse();
387        Self::new(comments)
388    }
389
390    /// Collapse this collection of comments and separate each comment with `\n` as a single [`Comment`].
391    #[must_use]
392    pub fn collapse_comments(self, flatten: MultiFlatten) -> Option<Comment> {
393        let mut iter = self.comments.into_iter();
394        let first = iter.next()?;
395
396        let Some(second) = iter.next() else {
397            let text = first.text();
398            return Some(Comment::new(
399                flatten_lines(text, flatten),
400                first.kind().to_owned(),
401                first.span().to_owned(),
402            ));
403        };
404
405        let start = *first.span().start();
406
407        let mut text = first.text().to_owned();
408        text.push('\n');
409        text.push_str(second.text());
410
411        let mut end = *second.span().end();
412
413        for c in iter {
414            text.push('\n');
415            text.push_str(c.text());
416            end = *c.span().end();
417        }
418
419        Some(Comment::new(
420            flatten_lines(&text, flatten),
421            CommentKind::MultiLine,
422            Span::new(start, end),
423        ))
424    }
425}
426
427fn flatten_lines(lines: &str, flatten: MultiFlatten) -> String {
428    let mut out = String::new();
429    let sep = match flatten {
430        MultiFlatten::FlattenWithNone => String::new(),
431        MultiFlatten::NoFlat => return lines.to_owned(),
432        MultiFlatten::Flatten(chars) => chars.to_owned(),
433    };
434    for (i, line) in lines.lines().enumerate() {
435        if i > 0 {
436            out.push_str(&sep);
437        }
438        out.push_str(line);
439    }
440    out
441}
442
443/// Controls how leading comments are captured for a statement.
444#[derive(Clone, Copy, Debug, Eq, PartialEq, Default)]
445pub enum LeadingCommentCapture {
446    /// Capture only the single nearest leading comment.
447    #[default]
448    SingleNearest,
449    /// Capture all contiguous leading comments, stopping at the first blank line.
450    AllLeading,
451    /// Capture all contiguous single-line or at most one multi-line leading comments.
452    AllSingleOneMulti,
453}
454
455/// Enum for multiline comment flattening.
456#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
457pub enum MultiFlatten<'a> {
458    /// Default option, retains multiline structure with `\n`
459    #[default]
460    NoFlat,
461    /// Sets multiline comments to be flattened and combined without adding formatting
462    FlattenWithNone,
463    /// Will flatten comments and amend the content of [`String`] to the end of the former leading lines
464    Flatten(&'a str),
465}
466
467#[cfg(test)]
468mod tests {
469    use std::{env, fs};
470
471    use crate::comments::{Comment, CommentError, CommentKind, Comments, Location, Span};
472
473    #[test]
474    fn location_new_and_default() {
475        let mut location = Location::new(2, 5);
476        location.column = 20;
477        location.line = 43;
478
479        assert_eq!(Location { column: 20, line: 43 }, location);
480
481        let location2 = Location::default();
482        assert_eq!(location2, Location { line: 1, column: 1 });
483    }
484
485    #[test]
486    fn span_default_and_updates() {
487        let default = Span::default();
488        assert_eq!(default.start, Location::default());
489        assert_eq!(default.end, Location::default());
490
491        let span = Span { end: Location::new(55, 100), ..Default::default() };
492
493        assert_eq!(span.start, Location::default());
494        assert_eq!(span.end, Location { line: 55, column: 100 });
495    }
496
497    #[test]
498    fn comments_with_comment_kind() {
499        let raw_comment = "-- a comment";
500        let len = raw_comment.len() as u64;
501
502        let singleline = CommentKind::SingleLine;
503        let mut span = Span::default();
504        span.end.column = len - 1;
505
506        let comment = Comment::new(raw_comment.to_owned(), singleline.clone(), span);
507
508        assert_eq!(comment.kind, singleline);
509
510        let expected_span =
511            Span::new(Location { line: 1, column: 1 }, Location { line: 1, column: len - 1 });
512
513        assert_eq!(comment.span, expected_span);
514    }
515
516    #[test]
517    fn multiline_comment_span() {
518        let kind = CommentKind::MultiLine;
519        let span = Span::new(Location { line: 1, column: 1 }, Location { line: 2, column: 9 });
520
521        let comment = Comment::new("/* hello world */".to_owned(), kind.clone(), span);
522
523        assert_eq!(comment.kind, kind);
524        assert_eq!(comment.span.start.line, 1);
525        assert_eq!(comment.span.end.line, 2);
526    }
527
528    #[test]
529    fn parse_comments() -> Result<(), Box<dyn std::error::Error>> {
530        use crate::{ast::ParsedSqlFileSet, comments::Comments, source::SqlSource};
531        let base = env::temp_dir().join("all_sql_files");
532        let _ = fs::remove_dir_all(&base);
533        fs::create_dir_all(&base)?;
534        let file1 = base.join("with_single_line_comments.sql");
535        fs::File::create(&file1)?;
536        fs::write(&file1, single_line_comments_sql())?;
537        let file2 = base.join("with_multiline_comments.sql");
538        fs::File::create(&file2)?;
539        fs::write(&file2, multiline_comments_sql())?;
540        let file3 = base.join("with_mixed_comments.sql");
541        fs::File::create(&file3)?;
542        fs::write(&file3, mixed_comments_sql())?;
543        let file4 = base.join("without_comments.sql");
544        fs::File::create(&file4)?;
545        fs::write(&file4, no_comments_sql())?;
546        let set = SqlSource::sql_sources(&base, &[])?;
547        let parsed_set = ParsedSqlFileSet::parse_all(set)?;
548
549        for file in parsed_set.files() {
550            let parsed_comments = Comments::parse_all_comments_from_file(file)?;
551            let filename = file
552                .file()
553                .path()
554                .and_then(|p| p.file_name())
555                .and_then(|s| s.to_str())
556                .ok_or("Should have a file name")?;
557
558            match filename {
559                "with_single_line_comments.sql" => {
560                    assert_parsed_comments_eq(&parsed_comments, expected_single_line_comments());
561                }
562                "with_multiline_comments.sql" => {
563                    assert_parsed_comments_eq(&parsed_comments, expected_multiline_comments());
564                }
565                "with_mixed_comments.sql" => {
566                    assert_parsed_comments_eq(&parsed_comments, expected_mixed_comments());
567                }
568                "without_comments.sql" => {
569                    assert!(parsed_comments.comments().is_empty());
570                }
571                other => {
572                    unreachable!(
573                        "unexpected test file {other}; directory should only contain known test files"
574                    );
575                }
576            }
577        }
578        let _ = fs::remove_dir_all(&base);
579        Ok(())
580    }
581
582    fn assert_parsed_comments_eq(parsed: &Comments, expected: &[&str]) {
583        let comments = parsed.comments();
584        assert_eq!(
585            expected.len(),
586            comments.len(),
587            "mismatched comment count (expected {}, got {})",
588            expected.len(),
589            comments.len()
590        );
591
592        for (i, comment) in comments.iter().enumerate() {
593            assert_eq!(expected[i], comment.text(), "comment at index {i} did not match");
594        }
595    }
596
597    fn single_line_comments_sql() -> &'static str {
598        "-- Users table stores user account information
599CREATE TABLE users (
600    -- Primary key
601    id INTEGER PRIMARY KEY,
602    -- Username for login
603    username VARCHAR(255) NOT NULL,
604    -- Email address
605    email VARCHAR(255) UNIQUE NOT NULL,
606    -- When the user registered
607    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
608);
609
610-- Posts table stores blog posts
611CREATE TABLE posts (
612    -- Primary key
613    id INTEGER PRIMARY KEY,
614    -- Post title
615    title VARCHAR(255) NOT NULL,
616    -- Foreign key linking to users
617    user_id INTEGER NOT NULL,
618    -- Main body text
619    body TEXT NOT NULL,
620    -- When the post was created
621    published_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
622);"
623    }
624
625    fn multiline_comments_sql() -> &'static str {
626        r"/* Users table stores user account information 
627multiline */
628CREATE TABLE users (
629    /* Primary key 
630    multiline */
631    id INTEGER PRIMARY KEY,
632    /* Username for login 
633    multiline */
634    username VARCHAR(255) NOT NULL,
635    /* Email address 
636    multiline */
637    email VARCHAR(255) UNIQUE NOT NULL,
638    /* When the user registered 
639    multiline */
640    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
641);
642
643/* Posts table stores blog posts 
644multiline */
645CREATE TABLE posts (
646    /* Primary key 
647    multiline */
648    id INTEGER PRIMARY KEY,
649    /* Post title 
650    multiline */
651    title VARCHAR(255) NOT NULL,
652    /* Foreign key linking to users 
653    multiline */
654    user_id INTEGER NOT NULL,
655    /* Main body text 
656    multiline */
657    body TEXT NOT NULL,
658    /* When the post was created 
659    multiline */
660    published_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
661);"
662    }
663
664    fn no_comments_sql() -> &'static str {
665        "CREATE TABLE users (
666    id INTEGER PRIMARY KEY,
667    username VARCHAR(255) NOT NULL,
668    email VARCHAR(255) UNIQUE NOT NULL,
669    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
670);
671
672CREATE TABLE posts (
673    id INTEGER PRIMARY KEY,
674    title VARCHAR(255) NOT NULL,
675    user_id INTEGER NOT NULL,
676    body TEXT NOT NULL,
677    published_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
678);"
679    }
680
681    fn mixed_comments_sql() -> &'static str {
682        "-- interstitial Comment above statements (should be ignored)
683
684/* Users table stores user account information */
685CREATE TABLE users ( /* users interstitial comment 
686(should be ignored) */
687    -- Primary key
688    id INTEGER PRIMARY KEY, -- Id comment that is interstitial (should be ignored)
689    /* Username for login */
690    username VARCHAR(255) NOT NULL,
691    -- Email address
692    email VARCHAR(255) UNIQUE NOT NULL,
693    /* When the user registered */
694    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
695);
696
697/* Posts table stores blog posts */
698CREATE TABLE posts (
699    -- Primary key
700    id INTEGER PRIMARY KEY,
701    /* Post title */
702    title VARCHAR(255) NOT NULL,
703    -- Foreign key linking to users
704    user_id INTEGER NOT NULL,
705    /* Main body text */
706    body TEXT NOT NULL,
707    -- When the post was created
708    published_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
709);
710"
711    }
712
713    fn expected_single_line_comments() -> &'static [&'static str] {
714        &[
715            "Users table stores user account information",
716            "Primary key",
717            "Username for login",
718            "Email address",
719            "When the user registered",
720            "Posts table stores blog posts",
721            "Primary key",
722            "Post title",
723            "Foreign key linking to users",
724            "Main body text",
725            "When the post was created",
726        ]
727    }
728
729    fn expected_multiline_comments() -> &'static [&'static str] {
730        &[
731            "Users table stores user account information\nmultiline",
732            "Primary key\nmultiline",
733            "Username for login\nmultiline",
734            "Email address\nmultiline",
735            "When the user registered\nmultiline",
736            "Posts table stores blog posts\nmultiline",
737            "Primary key\nmultiline",
738            "Post title\nmultiline",
739            "Foreign key linking to users\nmultiline",
740            "Main body text\nmultiline",
741            "When the post was created\nmultiline",
742        ]
743    }
744
745    fn expected_mixed_comments() -> &'static [&'static str] {
746        &[
747            "interstitial Comment above statements (should be ignored)",
748            "Users table stores user account information",
749            "users interstitial comment\n(should be ignored)",
750            "Primary key",
751            "Id comment that is interstitial (should be ignored)",
752            "Username for login",
753            "Email address",
754            "When the user registered",
755            "Posts table stores blog posts",
756            "Primary key",
757            "Post title",
758            "Foreign key linking to users",
759            "Main body text",
760            "When the post was created",
761        ]
762    }
763
764    #[test]
765    fn single_line_comment_spans_are_correct() -> Result<(), Box<dyn std::error::Error>> {
766        use crate::{ast::ParsedSqlFileSet, source::SqlSource};
767        let base = env::temp_dir().join("single_line_spans");
768        let _ = fs::remove_dir_all(&base);
769        fs::create_dir_all(&base)?;
770        let file = base.join("single.sql");
771        fs::File::create(&file)?;
772        fs::write(&file, single_line_comments_sql())?;
773        let set = SqlSource::sql_sources(&base, &[])?;
774        let parsed_set = ParsedSqlFileSet::parse_all(set)?;
775        let file = parsed_set
776            .files()
777            .iter()
778            .find(|f| {
779                f.file().path().and_then(|p| p.to_str()).is_some_and(|p| p.ends_with("single.sql"))
780            })
781            .ok_or("single.sql should be present")?;
782
783        let comments = Comments::parse_all_comments_from_file(file)?;
784        let comments = comments.comments();
785        assert_eq!(comments.len(), 11);
786        let first = &comments[0];
787        assert_eq!(first.text(), "Users table stores user account information");
788        assert_eq!(first.span().start(), &Location::new(1, 1));
789        assert_eq!(first.span().end(), &Location::new(1, 47));
790        let primary_key = &comments[1];
791        assert_eq!(primary_key.text(), "Primary key");
792        assert_eq!(primary_key.span().start(), &Location::new(3, 5));
793        assert_eq!(primary_key.span().end(), &Location::new(3, 19));
794        assert!(
795            primary_key.span().end().column() > primary_key.span().start().column(),
796            "end column should be after start column",
797        );
798        let _ = fs::remove_dir_all(&base);
799        Ok(())
800    }
801
802    #[test]
803    fn multiline_comment_spans_are_correct() -> Result<(), Box<dyn std::error::Error>> {
804        use crate::{ast::ParsedSqlFileSet, source::SqlSource};
805        let base = env::temp_dir().join("multi_line_spans");
806        let _ = fs::remove_dir_all(&base);
807        fs::create_dir_all(&base)?;
808        let file = base.join("multi.sql");
809        fs::File::create(&file)?;
810        fs::write(&file, multiline_comments_sql())?;
811        let set = SqlSource::sql_sources(&base, &[])?;
812        let parsed_set = ParsedSqlFileSet::parse_all(set)?;
813        let file = parsed_set
814            .files()
815            .iter()
816            .find(|f| {
817                f.file().path().and_then(|p| p.to_str()).is_some_and(|p| p.ends_with("multi.sql"))
818            })
819            .ok_or("multi.sql should be present")?;
820
821        let comments = Comments::parse_all_comments_from_file(file)?;
822        let comments = comments.comments();
823        assert_eq!(comments.len(), 11);
824        let first = &comments[0];
825        assert_eq!(first.text(), "Users table stores user account information\nmultiline");
826        assert_eq!(first.span().start(), &Location::new(1, 1));
827        assert_eq!(first.span().end().line(), 2);
828        assert!(
829            first.span().end().column() > first.span().start().column(),
830            "end column should be after start column for first multiline comment",
831        );
832        let primary_key = &comments[1];
833        assert_eq!(primary_key.text(), "Primary key\nmultiline");
834        assert_eq!(primary_key.span().start(), &Location::new(4, 5));
835        assert_eq!(primary_key.span().end().line(), 5);
836        assert!(
837            primary_key.span().end().column() > primary_key.span().start().column(),
838            "end column should be after start column for primary key multiline comment",
839        );
840        let _ = fs::remove_dir_all(&base);
841        Ok(())
842    }
843
844    #[test]
845    fn test_comment_error() {
846        let unterminated =
847            CommentError::UnterminatedMultiLineComment { start: Location::default() };
848        let location = Location { line: 1, column: 1 };
849        let expected = format!(
850            "unterminated block comment with start at line {}, column {}",
851            location.line(),
852            location.column()
853        );
854        assert_eq!(unterminated.to_string(), expected);
855    }
856
857    #[test]
858    fn test_comments() {
859        let comment_vec = vec![
860            Comment::new(
861                "a comment".to_owned(),
862                CommentKind::SingleLine,
863                Span { start: Location::new(1, 1), end: Location::new(1, 12) },
864            ),
865            Comment::new(
866                "a second comment".to_owned(),
867                CommentKind::SingleLine,
868                Span { start: Location::new(1, 1), end: Location::new(2, 19) },
869            ),
870        ];
871        let length = comment_vec.len();
872        let comments = Comments::new(comment_vec.clone());
873        assert!(comments.comments().len() == length);
874        for (i, comment) in comments.comments().iter().enumerate() {
875            assert_eq!(comment.text(), comment_vec[i].text());
876            assert_eq!(comment.span().start(), comment_vec[i].span().start());
877            assert_eq!(comment.span().end(), comment_vec[i].span().end());
878        }
879    }
880
881    use crate::comments::LeadingCommentCapture;
882
883    fn texts(v: &Comments) -> Vec<String> {
884        v.comments().iter().map(|c| c.text().to_owned()).collect()
885    }
886
887    #[test]
888    fn leading_comment_capture_default_is_single_nearest() {
889        assert_eq!(LeadingCommentCapture::default(), LeadingCommentCapture::SingleNearest);
890    }
891
892    #[test]
893    fn leading_comments_single_nearest_and_all_leading_basic_runover()
894    -> Result<(), Box<dyn std::error::Error>> {
895        let src = "\
896-- c1
897-- c2
898CREATE TABLE t (id INTEGER);
899";
900        let parsed = Comments::scan_comments(src)?;
901        let single = parsed.leading_comments(3, LeadingCommentCapture::SingleNearest);
902        assert_eq!(texts(&single), vec!["c2".to_owned()]);
903
904        let all = parsed.leading_comments(3, LeadingCommentCapture::AllLeading);
905        assert_eq!(texts(&all), vec!["c1".to_owned(), "c2".to_owned()]);
906
907        Ok(())
908    }
909
910    #[test]
911    fn leading_comments_all_leading_stops_at_blank_line() -> Result<(), Box<dyn std::error::Error>>
912    {
913        let src = "\
914-- c1
915
916-- c2
917CREATE TABLE t (id INTEGER);
918";
919        let parsed = Comments::scan_comments(src)?;
920        let all = parsed.leading_comments(4, LeadingCommentCapture::AllLeading);
921        assert_eq!(texts(&all), vec!["c2".to_owned()]);
922
923        Ok(())
924    }
925
926    #[test]
927    fn leading_comments_all_single_one_multi_collects_singles_and_one_multiline()
928    -> Result<(), Box<dyn std::error::Error>> {
929        let src = "\
930/* m
931m */
932-- s1
933-- s2
934CREATE TABLE t (id INTEGER);
935";
936        let parsed = Comments::scan_comments(src)?;
937        let got = parsed.leading_comments(5, LeadingCommentCapture::AllSingleOneMulti);
938        assert_eq!(texts(&got), vec!["m\nm".to_owned(), "s1".to_owned(), "s2".to_owned(),]);
939
940        Ok(())
941    }
942
943    #[test]
944    fn leading_comments_all_single_one_multi_stops_before_second_multiline()
945    -> Result<(), Box<dyn std::error::Error>> {
946        let src = "\
947/* m1 */
948/* m2 */
949-- s1
950CREATE TABLE t (id INTEGER);
951";
952        let parsed = Comments::scan_comments(src)?;
953        let got = parsed.leading_comments(4, LeadingCommentCapture::AllSingleOneMulti);
954        assert_eq!(texts(&got), vec!["m2".to_owned(), "s1".to_owned()]);
955
956        Ok(())
957    }
958
959    #[test]
960    fn leading_comments_single_nearest_can_return_multiline()
961    -> Result<(), Box<dyn std::error::Error>> {
962        let src = "\
963/* hello
964world */
965CREATE TABLE t (id INTEGER);
966";
967        let parsed = Comments::scan_comments(src)?;
968        let got = parsed.leading_comments(3, LeadingCommentCapture::SingleNearest);
969        assert_eq!(texts(&got), vec!["hello\nworld".to_owned()]);
970
971        Ok(())
972    }
973
974    #[test]
975    fn collapse_comments_empty_returns_none() {
976        let comments = Comments::new(vec![]);
977        assert!(comments.collapse_comments(crate::comments::MultiFlatten::NoFlat).is_none());
978    }
979
980    #[test]
981    fn collapse_comments_single_returns_same_comment() {
982        let c = Comment::new(
983            "solo".to_owned(),
984            CommentKind::SingleLine,
985            Span::new(Location::new(10, 3), Location::new(10, 11)),
986        );
987        let comments = Comments::new(vec![c]);
988
989        let collapsed = comments
990            .collapse_comments(crate::comments::MultiFlatten::NoFlat)
991            .unwrap_or_else(|| panic!("should return a comment"));
992        assert_eq!(collapsed.text(), "solo");
993        assert_eq!(collapsed.kind(), &CommentKind::SingleLine);
994        assert_eq!(collapsed.span(), &Span::new(Location::new(10, 3), Location::new(10, 11)));
995    }
996
997    #[test]
998    fn collapse_comments_multiple_joins_text_and_expands_span_and_sets_multiline_kind() {
999        let c1 = Comment::new(
1000            "a".to_owned(),
1001            CommentKind::SingleLine,
1002            Span::new(Location::new(1, 1), Location::new(1, 6)),
1003        );
1004        let c2 = Comment::new(
1005            "b".to_owned(),
1006            CommentKind::SingleLine,
1007            Span::new(Location::new(2, 1), Location::new(2, 6)),
1008        );
1009        let c3 = Comment::new(
1010            "c".to_owned(),
1011            CommentKind::MultiLine,
1012            Span::new(Location::new(3, 1), Location::new(4, 3)),
1013        );
1014
1015        let comments = Comments::new(vec![c1, c2, c3]);
1016
1017        let collapsed = comments
1018            .collapse_comments(crate::comments::MultiFlatten::NoFlat)
1019            .unwrap_or_else(|| panic!("should collapse"));
1020        assert_eq!(collapsed.text(), "a\nb\nc");
1021        assert_eq!(collapsed.kind(), &CommentKind::MultiLine);
1022        assert_eq!(collapsed.span(), &Span::new(Location::new(1, 1), Location::new(4, 3)));
1023    }
1024
1025    #[test]
1026    fn collapse_comments_with_leading_comments_allleading_collapses_correctly()
1027    -> Result<(), Box<dyn std::error::Error>> {
1028        let src = "\
1029-- c1
1030-- c2
1031CREATE TABLE t (id INTEGER);
1032";
1033        let parsed = Comments::scan_comments(src)?;
1034
1035        let leading = parsed.leading_comments(3, LeadingCommentCapture::AllLeading);
1036        assert_eq!(texts(&leading), vec!["c1".to_owned(), "c2".to_owned()]);
1037
1038        let collapsed = leading
1039            .collapse_comments(crate::comments::MultiFlatten::NoFlat)
1040            .unwrap_or_else(|| panic!("should collapse"));
1041        assert_eq!(collapsed.text(), "c1\nc2");
1042        assert_eq!(collapsed.kind(), &CommentKind::MultiLine);
1043
1044        // Span sanity: starts at first comment start, ends at second comment end.
1045        assert_eq!(*collapsed.span().start(), Location::new(1, 1));
1046        assert_eq!(collapsed.span().end().line(), 2);
1047
1048        Ok(())
1049    }
1050
1051    #[test]
1052    fn collapse_comments_with_leading_comments_single_nearest_preserves_kind()
1053    -> Result<(), Box<dyn std::error::Error>> {
1054        let src = "\
1055-- c1
1056-- c2
1057CREATE TABLE t (id INTEGER);
1058";
1059        let parsed = Comments::scan_comments(src)?;
1060        let leading = parsed.leading_comments(3, LeadingCommentCapture::SingleNearest);
1061        assert_eq!(texts(&leading), vec!["c2".to_owned()]);
1062
1063        let collapsed = leading
1064            .collapse_comments(crate::comments::MultiFlatten::NoFlat)
1065            .unwrap_or_else(|| panic!("should collapse"));
1066        assert_eq!(collapsed.text(), "c2");
1067        assert_eq!(collapsed.kind(), &CommentKind::SingleLine);
1068
1069        Ok(())
1070    }
1071    use crate::comments::flatten_lines;
1072    #[test]
1073    fn test_flatten_lines_behavior() {
1074        let input = "a\nb\nc";
1075        let no_sep = flatten_lines(input, crate::comments::MultiFlatten::FlattenWithNone);
1076        assert_eq!(no_sep, "abc");
1077        let dash_sep = flatten_lines(input, crate::comments::MultiFlatten::Flatten(" - "));
1078        assert_eq!(dash_sep, "a - b - c");
1079        let single = flatten_lines("solo", crate::comments::MultiFlatten::Flatten("XXX"));
1080        assert_eq!(single, "solo");
1081    }
1082}